Spaces:

X-Pipe
/

flash

Sleeping

App Files Files Community

add chinese-large embedding optimize

by NickNYU - opened Jun 28, 2023

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+508

-603

Files changed (31) hide show

.gitattributes +35 -0
.gitignore +0 -1
.idea/.gitignore +0 -8
.idea/inspectionProfiles/profiles_settings.xml +0 -6
.idea/llama-xpipe.iml +0 -11
.idea/misc.xml +0 -4
.idea/modules.xml +0 -8
.idea/vcs.xml +0 -6
app.py +43 -45
core/__pycache__/__init__.cpython-310.pyc +0 -0
core/__pycache__/lifecycle.cpython-310.pyc +0 -0
core/__pycache__/logger_factory.cpython-310.pyc +0 -0
core/helper.py +5 -12
core/lifecycle.py +184 -184
core/test_lifecycle.py +3 -0
dataset/docstore.json +0 -0
dataset/index_store.json +1 -1
dataset/vector_store.json +0 -0
docs/docs.pkl +0 -0
github_retriever.py +63 -0
langchain_manager/__pycache__/__init__.cpython-310.pyc +0 -0
langchain_manager/manager.py +4 -35
llama/{storage_context.py → context.py} +60 -20
llama/data_loader.py +5 -6
llama/index.py +18 -0
llama/service_context.py +0 -142
llama/vector_storage.py +18 -0
local-requirements.txt +0 -1
requirements.txt +6 -10
xpipe_wiki/manager_factory.py +10 -45
xpipe_wiki/robot_manager.py +4 -9

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -56,7 +56,6 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 .ruff_cache
-wandb/
 # Translations
 *.mo

 .hypothesis/
 .pytest_cache/
 .ruff_cache
 # Translations
 *.mo

.idea/.gitignore DELETED Viewed

@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml

.idea/inspectionProfiles/profiles_settings.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>

.idea/llama-xpipe.iml DELETED Viewed

@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/.venv" />
-      <excludeFolder url="file://$MODULE_DIR$/venv" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>

.idea/misc.xml DELETED Viewed

@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
-</project>

.idea/modules.xml DELETED Viewed

@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
-    </modules>
-  </component>
-</project>

.idea/vcs.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>

app.py CHANGED Viewed

@@ -1,45 +1,43 @@
-import logging
-import sys
-import streamlit as st
-from dotenv import load_dotenv
-from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
-logging.basicConfig(
-    stream=sys.stdout, level=logging.INFO
-)  # logging.DEBUG for more verbose output
-# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
-# # Sidebar contents
-with st.sidebar:
-    st.title("🤗💬 LLM Chat App")
-    st.markdown(
-        """
-    ## About
-    This app is an LLM-powered chatbot built using:
-    - [Streamlit](https://streamlit.io/)
-    - [LangChain](https://python.langchain.com/)
-    - [X-Pipe](https://github.com/ctripcorp/x-pipe)
-    """
-    )
-    # add_vertical_space(5)
-    st.write("Made by Nick")
-def main() -> None:
-    st.header("X-Pipe Wiki 机器人 💬")
-    robot_manager = XPipeRobotManagerFactory.get_or_create(
-        XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
-    )
-    robot = robot_manager.get_robot()
-    query = st.text_input("X-Pipe Wiki 问题:")
-    if query:
-        response = robot.ask(question=query)
-        st.write(response)
-if __name__ == "__main__":
-    load_dotenv()
-    main()

+import logging
+import sys
+import streamlit as st
+from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
+from xpipe_wiki.robot_manager import XPipeWikiRobot, AzureOpenAIXPipeWikiRobot
+logging.basicConfig(
+    stream=sys.stdout, level=logging.DEBUG
+)  # logging.DEBUG for more verbose output
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+# Sidebar contents
+with st.sidebar:
+    st.title("🤗💬 LLM Chat App")
+    st.markdown(
+        """
+    ## About
+    This app is an LLM-powered chatbot built using:
+    - [Streamlit](https://streamlit.io/)
+    - [LangChain](https://python.langchain.com/)
+    - [X-Pipe](https://github.com/ctripcorp/x-pipe)
+    """
+    )
+    # add_vertical_space(5)
+    st.write("Made by Nick")
+def main() -> None:
+    st.header("X-Pipe Wiki 机器人 💬")
+    robot_manager = XPipeRobotManagerFactory.get_or_create(
+        XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
+    )
+    robot = robot_manager.get_robot()
+    query = st.text_input("X-Pipe Wiki 问题:")
+    if query:
+        response = robot.ask(question=query)
+        st.write(response)
+if __name__ == "__main__":
+    main()

core/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ

core/__pycache__/lifecycle.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ

core/__pycache__/logger_factory.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ

core/helper.py CHANGED Viewed

@@ -2,30 +2,23 @@ from core.lifecycle import Lifecycle
 class LifecycleHelper:
     @classmethod
     def initialize_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
-            ls.lifecycle_state.phase
-        ):
             ls.initialize()
     @classmethod
     def start_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
-            ls.lifecycle_state.phase
-        ):
             ls.start()
     @classmethod
     def stop_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
-            ls.lifecycle_state.phase
-        ):
             ls.stop()
     @classmethod
     def dispose_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
-            ls.lifecycle_state.phase
-        ):
             ls.dispose()

 class LifecycleHelper:
     @classmethod
     def initialize_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(ls.lifecycle_state.phase):
             ls.initialize()
     @classmethod
     def start_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(ls.lifecycle_state.phase):
             ls.start()
     @classmethod
     def stop_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(ls.lifecycle_state.phase):
             ls.stop()
     @classmethod
     def dispose_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(ls.lifecycle_state.phase):
             ls.dispose()

core/lifecycle.py CHANGED Viewed

@@ -1,184 +1,184 @@
-import enum
-from abc import ABC, abstractmethod
-from typing import TypeVar, Optional
-from core import logger_factory
-class Initializable(ABC):
-    @abstractmethod
-    def initialize(self) -> None:
-        pass
-class Startable(ABC):
-    @abstractmethod
-    def start(self) -> None:
-        pass
-class Stoppable(ABC):
-    @abstractmethod
-    def stop(self) -> None:
-        pass
-class Disposable(ABC):
-    @abstractmethod
-    def dispose(self) -> None:
-        pass
-class LifecycleAware(ABC):
-    def __init__(self, state: "LifecycleState") -> None:
-        """
-        Args:
-            state(LifecycleState): lifecycle state
-        """
-        self.state = state
-    def get_lifecycle_state(self) -> "LifecycleState":
-        return self.state
-class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
-    def __init__(self) -> None:
-        self.logger = logger_factory.get_logger(self.__class__.__name__)
-        self.lifecycle_state = LifecycleState(lifecycle=self)
-    def initialize(self) -> None:
-        if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
-        self.do_init()
-        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
-    def start(self) -> None:
-        if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
-        self.do_start()
-        self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
-    def stop(self) -> None:
-        if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
-        self.do_stop()
-        self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
-    def dispose(self) -> None:
-        if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
-        self.do_dispose()
-        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
-    @abstractmethod
-    def do_init(self) -> None:
-        pass
-    @abstractmethod
-    def do_start(self) -> None:
-        pass
-    @abstractmethod
-    def do_stop(self) -> None:
-        pass
-    @abstractmethod
-    def do_dispose(self) -> None:
-        pass
-class LifecyclePhase(enum.Enum):
-    INITIALIZING = 1
-    INITIALIZED = 2
-    STARTING = 3
-    STARTED = 4
-    STOPPING = 5
-    STOPPED = 6
-    DISPOSING = 7
-    DISPOSED = 8
-class LifecycleController(ABC):
-    def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is None or phase == LifecyclePhase.DISPOSED
-    def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
-        )
-    def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and phase == LifecyclePhase.STARTED
-    def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
-        )
-LS = TypeVar("LS", bound=Lifecycle)
-class LifecycleState(LifecycleController, ABC):
-    phase: Optional[LifecyclePhase]
-    def __init__(self, lifecycle: LS) -> None:
-        self.phase = None
-        self.prev_phase = None
-        self.lifecycle = lifecycle
-        self.logger = logger_factory.get_logger(__name__)
-    def is_initializing(self) -> bool:
-        return self.phase == LifecyclePhase.INITIALIZING
-    def is_initialized(self) -> bool:
-        return self.phase == LifecyclePhase.INITIALIZED
-    def is_starting(self) -> bool:
-        return self.phase == LifecyclePhase.STARTING
-    def is_started(self) -> bool:
-        return self.phase == LifecyclePhase.STARTED
-    def is_stopping(self) -> bool:
-        return self.phase == LifecyclePhase.STOPPING
-    def is_stopped(self) -> bool:
-        return self.phase == LifecyclePhase.STOPPED
-    def is_disposing(self) -> bool:
-        return self.phase == LifecyclePhase.DISPOSING
-    def is_disposed(self) -> bool:
-        return self.phase == LifecyclePhase.DISPOSED
-    def get_phase(self) -> Optional[LifecyclePhase]:
-        return self.phase
-    def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
-        prev = "None"
-        if self.phase is not None:
-            prev = self.phase.name
-        current = "None"
-        if phase is not None:
-            current = phase.name
-        self.logger.info(
-            "[setPhaseName][{}]{} --> {}".format(
-                self.lifecycle.__class__.__name__,
-                prev,
-                current,
-            )
-        )
-        self.phase = phase
-    def rollback(self, err: Exception) -> None:
-        self.phase = self.prev_phase
-        self.prev_phase = None

+import enum
+from abc import ABC, abstractmethod
+from typing import TypeVar, Optional
+from core import logger_factory
+class Initializable(ABC):
+    @abstractmethod
+    def initialize(self) -> None:
+        pass
+class Startable(ABC):
+    @abstractmethod
+    def start(self) -> None:
+        pass
+class Stoppable(ABC):
+    @abstractmethod
+    def stop(self) -> None:
+        pass
+class Disposable(ABC):
+    @abstractmethod
+    def dispose(self) -> None:
+        pass
+class LifecycleAware(ABC):
+    def __init__(self, state: "LifecycleState") -> None:
+        """
+        Args:
+            state(LifecycleState): lifecycle state
+        """
+        self.state = state
+    def get_lifecycle_state(self) -> "LifecycleState":
+        return self.state
+class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
+    def __init__(self) -> None:
+        self.logger = logger_factory.get_logger(self.__class__.__name__)
+        self.lifecycle_state = LifecycleState(lifecycle=self)
+    def initialize(self) -> None:
+        if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
+        self.do_init()
+        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
+    def start(self) -> None:
+        if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
+        self.do_start()
+        self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
+    def stop(self) -> None:
+        if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
+        self.do_stop()
+        self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
+    def dispose(self) -> None:
+        if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
+        self.do_dispose()
+        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
+    @abstractmethod
+    def do_init(self) -> None:
+        pass
+    @abstractmethod
+    def do_start(self) -> None:
+        pass
+    @abstractmethod
+    def do_stop(self) -> None:
+        pass
+    @abstractmethod
+    def do_dispose(self) -> None:
+        pass
+class LifecyclePhase(enum.Enum):
+    INITIALIZING = 1
+    INITIALIZED = 2
+    STARTING = 3
+    STARTED = 4
+    STOPPING = 5
+    STOPPED = 6
+    DISPOSING = 7
+    DISPOSED = 8
+class LifecycleController(ABC):
+    def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is None or phase == LifecyclePhase.DISPOSED
+    def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and (
+                phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
+        )
+    def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and phase == LifecyclePhase.STARTED
+    def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and (
+                phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
+        )
+LS = TypeVar("LS", bound=Lifecycle)
+class LifecycleState(LifecycleController, ABC):
+    phase: Optional[LifecyclePhase]
+    def __init__(self, lifecycle: LS) -> None:
+        self.phase = None
+        self.prev_phase = None
+        self.lifecycle = lifecycle
+        self.logger = logger_factory.get_logger(__name__)
+    def is_initializing(self) -> bool:
+        return self.phase == LifecyclePhase.INITIALIZING
+    def is_initialized(self) -> bool:
+        return self.phase == LifecyclePhase.INITIALIZED
+    def is_starting(self) -> bool:
+        return self.phase == LifecyclePhase.STARTING
+    def is_started(self) -> bool:
+        return self.phase == LifecyclePhase.STARTED
+    def is_stopping(self) -> bool:
+        return self.phase == LifecyclePhase.STOPPING
+    def is_stopped(self) -> bool:
+        return self.phase == LifecyclePhase.STOPPED
+    def is_disposing(self) -> bool:
+        return self.phase == LifecyclePhase.DISPOSING
+    def is_disposed(self) -> bool:
+        return self.phase == LifecyclePhase.DISPOSED
+    def get_phase(self) -> Optional[LifecyclePhase]:
+        return self.phase
+    def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
+        prev = "None"
+        if self.phase is not None:
+            prev = self.phase.name
+        current = "None"
+        if phase is not None:
+            current = phase.name
+        self.logger.info(
+            "[setPhaseName][{}]{} --> {}".format(
+                self.lifecycle.__class__.__name__,
+                prev,
+                current,
+            )
+        )
+        self.phase = phase
+    def rollback(self, err: Exception) -> None:
+        self.phase = self.prev_phase
+        self.prev_phase = None

core/test_lifecycle.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from unittest import TestCase
 from core.lifecycle import Lifecycle
 class SubLifecycle(Lifecycle):
     def __init__(self) -> None:

+import logging
 from unittest import TestCase
 from core.lifecycle import Lifecycle
+logging.basicConfig()
 class SubLifecycle(Lifecycle):
     def __init__(self) -> None:

dataset/docstore.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

dataset/index_store.json CHANGED Viewed

@@ -1 +1 @@

- {"index_store/data": {"~~da495c94~~-~~4541~~-~~47e1~~-~~b93f~~-~~8535192a5f28~~": {"__type__": "vector_store", "__data__": "{\"index_id\": \"~~da495c94~~-~~4541~~-~~47e1~~-~~b93f~~-~~8535192a5f28~~\", \"summary\": null, \"nodes_dict\": {\"~~59108663~~-~~a5e1~~-~~4e3e~~-~~bb21~~-~~626158eef136~~\": \"~~59108663~~-~~a5e1~~-~~4e3e~~-~~bb21~~-~~626158eef136~~\", \"~~50de4ec9~~-~~febb~~-~~466f~~-~~9f9a~~-~~cc9296895e83~~\": \"~~50de4ec9~~-~~febb~~-~~466f~~-~~9f9a~~-~~cc9296895e83~~\", \"~~aa413a53~~-~~0dda~~-~~4ac4~~-~~8ae9~~-~~6e8e340bb4f0~~\": \"~~aa413a53~~-~~0dda~~-~~4ac4~~-~~8ae9~~-~~6e8e340bb4f0~~\", \"~~a0cc4323~~-~~ec8f~~-~~4fed~~-~~9401~~-~~e44125134341~~\": \"~~a0cc4323~~-~~ec8f~~-~~4fed~~-~~9401~~-~~e44125134341~~\", \"~~5321cc7b~~-~~2a86~~-~~48b8~~-~~b56c~~-~~415dde7c149b~~\": \"~~5321cc7b~~-~~2a86~~-~~48b8~~-~~b56c~~-~~415dde7c149b~~\", \"~~9e19fb91~~-~~8258~~-~~4aca~~-~~9692~~-~~2d027073499e~~\": \"~~9e19fb91~~-~~8258~~-~~4aca~~-~~9692~~-~~2d027073499e~~\", \"~~02e856e5~~-~~4211~~-~~4a27~~-~~9204~~-~~e966907f1d74~~\": \"~~02e856e5~~-~~4211~~-~~4a27~~-~~9204~~-~~e966907f1d74~~\", \"~~f3074870~~-~~8fbf~~-~~4322~~-~~b1d2~~-~~2111e6aac9af~~\": \"~~f3074870~~-~~8fbf~~-~~4322~~-~~b1d2~~-~~2111e6aac9af~~\", \"~~82677fb9~~-~~abe3~~-~~4038~~-~~8263~~-~~5576c47da4f2~~\": \"~~82677fb9~~-~~abe3~~-~~4038~~-~~8263~~-~~5576c47da4f2~~\", \"~~a08364a6~~-~~c23d~~-~~4df5~~-~~8b5d~~-~~84137fbebd4e~~\": \"~~a08364a6~~-~~c23d~~-~~4df5~~-~~8b5d~~-~~84137fbebd4e~~\", \"~~e45b082d~~-~~c3ec~~-~~45aa~~-~~b630~~-~~6db49a62728b~~\": \"~~e45b082d~~-~~c3ec~~-~~45aa~~-~~b630~~-~~6db49a62728b~~\", \"~~2c55445c~~-~~04b1~~-~~4705~~-~~9871~~-~~adaa02f38f1b~~\": \"~~2c55445c~~-~~04b1~~-~~4705~~-~~9871~~-~~adaa02f38f1b~~\", \"~~d0de9736~~-~~ccad~~-~~450e~~-~~b4a1~~-~~49d4cdb8b941~~\": \"~~d0de9736~~-~~ccad~~-~~450e~~-~~b4a1~~-~~49d4cdb8b941~~\", \"~~fd0d2375~~-~~39e2~~-~~4bce~~-~~8e39~~-~~1182a122a1b4~~\": \"~~fd0d2375~~-~~39e2~~-~~4bce~~-~~8e39~~-~~1182a122a1b4~~\", \"~~13221de7~~-~~6c68~~-~~4367~~-~~b1be~~-~~f35b06fc3a74~~\": \"~~13221de7~~-~~6c68~~-~~4367~~-~~b1be~~-~~f35b06fc3a74~~\", \"~~9f448401~~-~~cda9~~-~~4b5f~~-~~9a80~~-~~c79e111f9963~~\": \"~~9f448401~~-~~cda9~~-~~4b5f~~-~~9a80~~-~~c79e111f9963~~\", \"~~3bc7dfc2~~-~~3ddf~~-~~4384~~-~~a60c~~-~~6cd52e1314f4~~\": \"~~3bc7dfc2~~-~~3ddf~~-~~4384~~-~~a60c~~-~~6cd52e1314f4~~\", \"~~ce3e530c~~-~~ce2d~~-~~4f5f~~-~~a171~~-~~72a790c3c624~~\": \"~~ce3e530c~~-~~ce2d~~-~~4f5f~~-~~a171~~-~~72a790c3c624~~\", \"~~85f764bd~~-~~e560~~-~~48ba~~-~~a51e~~-~~2287b6fe19db~~\": \"~~85f764bd~~-~~e560~~-~~48ba~~-~~a51e~~-~~2287b6fe19db~~\", \"~~3a8e4c7c~~-~~9f7d~~-~~4735~~-~~93e7~~-~~9d847cff98de~~\": \"~~3a8e4c7c~~-~~9f7d~~-~~4735~~-~~93e7~~-~~9d847cff98de~~\", \"~~af881b61~~-~~03f4~~-~~4851~~-~~8946~~-~~794015e3436c~~\": \"~~af881b61~~-~~03f4~~-~~4851~~-~~8946~~-~~794015e3436c~~\", \"~~31579820~~-~~439e~~-~~4029~~-~~b8c4~~-~~a0d6528daa59~~\": \"~~31579820~~-~~439e~~-~~4029~~-~~b8c4~~-~~a0d6528daa59~~\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

+ {"index_store/data": {"0928a9d6-bc3f-467f-9464-1d03e5aa155d": {"__type__": "vector_store", "__data__": "{\"index_id\": \"0928a9d6-bc3f-467f-9464-1d03e5aa155d\", \"summary\": null, \"nodes_dict\": {\"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\": \"a763d37e-d6ef-42ad-b0ce-143ed9698d4b\", \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\": \"9d8b60d0-64b1-4074-8fb3-a33c961270ed\", \"b051986d-d812-4326-b811-4ae17ae3c012\": \"b051986d-d812-4326-b811-4ae17ae3c012\", \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\": \"68d8c48a-1f9d-4b05-bf9a-e3b9490821d5\", \"8cfea388-ca09-4fb3-88d5-8570f6231d28\": \"8cfea388-ca09-4fb3-88d5-8570f6231d28\", \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\": \"f1edfc04-f95c-40c4-8d95-e7cd42745b62\", \"1ec3388c-c4e5-4706-a412-525b68481002\": \"1ec3388c-c4e5-4706-a412-525b68481002\", \"c2fcee2a-5c0d-4d26-86a0-273e87963874\": \"c2fcee2a-5c0d-4d26-86a0-273e87963874\", \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\": \"bd151167-8a9b-47e4-be4b-b03cb10c65b1\", \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\": \"3f5d936e-80a3-463c-ae17-fd9c376ff3e1\", \"91ab0928-bc7a-4fef-8693-308ad9764ef4\": \"91ab0928-bc7a-4fef-8693-308ad9764ef4\", \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\": \"fae01dbb-1bd5-47b5-989f-38f6029e2f4c\", \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\": \"3424533f-5d8c-4149-8ffe-3b53cadfb4fb\", \"00e72630-a81a-4bd8-bebe-a52f47bd2087\": \"00e72630-a81a-4bd8-bebe-a52f47bd2087\", \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\": \"a32f87c5-45c7-4e21-85cb-d49b6dd759ab\", \"4593da6f-276b-4f80-bb95-00e23bafb74b\": \"4593da6f-276b-4f80-bb95-00e23bafb74b\", \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\": \"578f3e77-3ec2-4b86-a2f8-a0a34d9ce810\", \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\": \"8fc8d087-6e09-4fb4-bac0-5fa6bfed7a15\", \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\": \"c38b7421-554a-49eb-b0cc-7c0de3723ef9\", \"739c5748-36c1-4087-b926-419639d4da27\": \"739c5748-36c1-4087-b926-419639d4da27\", \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\": \"b80c39d8-a895-48d1-a2dc-dc13ab03fb0a\", \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\": \"01d4860e-11ef-430d-9b9c-ee1bb34680f7\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

dataset/vector_store.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

docs/docs.pkl CHANGED Viewed

Binary files a/docs/docs.pkl and b/docs/docs.pkl differ

github_retriever.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from llama_hub.github_repo import GithubRepositoryReader, GithubClient
+from llama_index import download_loader, GPTVectorStoreIndex
+from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
+from langchain.llms import AzureOpenAI
+from langchain.embeddings.openai import OpenAIEmbeddings
+from llama_index import LangchainEmbedding, ServiceContext
+from llama_index import StorageContext, load_index_from_storage
+from dotenv import load_dotenv
+import os
+import pickle
+def main() -> None:
+    # define embedding
+    embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
+    # define LLM
+    llm_predictor = LLMPredictor(
+        llm=AzureOpenAI(
+            engine="text-davinci-003",
+            model_name="text-davinci-003",
+        )
+    )
+    # configure service context
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embedding
+    )
+    download_loader("GithubRepositoryReader")
+    docs = None
+    if os.path.exists("docs/docs.pkl"):
+        with open("docs/docs.pkl", "rb") as f:
+            docs = pickle.load(f)
+    if docs is None:
+        github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
+        loader = GithubRepositoryReader(
+            github_client,
+            owner="ctripcorp",
+            repo="x-pipe",
+            filter_directories=(
+                [".", "doc"],
+                GithubRepositoryReader.FilterType.INCLUDE,
+            ),
+            filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
+            verbose=True,
+            concurrent_requests=10,
+        )
+        docs = loader.load_data(branch="master")
+        with open("docs/docs.pkl", "wb") as f:
+            pickle.dump(docs, f)
+    index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
+    query_engine = index.as_query_engine(service_context=service_context)
+    response = query_engine.query("如何使用X-Pipe?")
+    print(response)
+if __name__ == "__main__":
+    load_dotenv()
+    main()

langchain_manager/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

langchain_manager/manager.py CHANGED Viewed

@@ -5,6 +5,8 @@ from langchain.embeddings.base import Embeddings as LCEmbeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.llms import AzureOpenAI
 class BaseLangChainManager(ABC):
     def __init__(self) -> None:
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
 class LangChainAzureManager(BaseLangChainManager):
     def __init__(self) -> None:
         super().__init__()
-        self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
-        self.llm = AzureOpenAI(
-            deployment_name="text-davinci-003",
-            # model_name="text-davinci-003",
-            model="text-davinci-003",
-            client=None,
-            # temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
-            # as a wiki robot we won't want to get flexible answers
-            temperature=0.0,
-            # GPT-3 default is 4096, however, openai.py default is 256
-            max_tokens=2048,
-        )
     # Override
     def get_embedding(self) -> LCEmbeddings:
-        return self.embedding
     # Override
     def get_llm(self) -> BaseLanguageModel:
-        return self.llm
-class LangChainHuggingFaceManager(BaseLangChainManager):
-    def __init__(self) -> None:
-        super().__init__()
-        from transformers import AutoTokenizer, AutoModel
-        AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
-        AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
-        self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
-        self.llm = AzureOpenAI(
             deployment_name="text-davinci-003",
             # model_name="text-davinci-003",
             model="text-davinci-003",
             client=None,
         )
-    # Override
-    def get_embedding(self) -> LCEmbeddings:
-        return self.embedding
-    # Override
-    def get_llm(self) -> BaseLanguageModel:
-        return self.llm

 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.llms import AzureOpenAI
+from core.lifecycle import Lifecycle
 class BaseLangChainManager(ABC):
     def __init__(self) -> None:
 class LangChainAzureManager(BaseLangChainManager):
     def __init__(self) -> None:
         super().__init__()
     # Override
     def get_embedding(self) -> LCEmbeddings:
+        return OpenAIEmbeddings(client=None, chunk_size=1)
     # Override
     def get_llm(self) -> BaseLanguageModel:
+        return AzureOpenAI(
             deployment_name="text-davinci-003",
             # model_name="text-davinci-003",
             model="text-davinci-003",
             client=None,
         )

llama/{storage_context.py → context.py} RENAMED Viewed

@@ -1,14 +1,62 @@
-from llama_index import StorageContext
-from typing import List
 from abc import abstractmethod, ABC
-from llama_index import Document
 from core.lifecycle import Lifecycle
-from llama.service_context import ServiceContextManager
 class StorageContextManager(Lifecycle, ABC):
     @abstractmethod
     def get_storage_context(self) -> StorageContext:
         pass
@@ -17,11 +65,9 @@ class StorageContextManager(Lifecycle, ABC):
 class LocalStorageContextManager(StorageContextManager):
     storage_context: StorageContext
-    def __init__(
-        self,
-        service_context_manager: ServiceContextManager,
-        dataset_path: str = "./dataset",
-    ) -> None:
         super().__init__()
         self.dataset_path = dataset_path
         self.service_context_manager = service_context_manager
@@ -31,11 +77,8 @@ class LocalStorageContextManager(StorageContextManager):
     def do_init(self) -> None:
         from llama.utils import is_local_storage_files_ready
         if is_local_storage_files_ready(self.dataset_path):
-            self.storage_context = StorageContext.from_defaults(
-                persist_dir=self.dataset_path
-            )
         else:
             docs = self._download()
             self._indexing(docs)
@@ -51,17 +94,14 @@ class LocalStorageContextManager(StorageContextManager):
     def do_dispose(self) -> None:
         self.storage_context.persist(self.dataset_path)
-    def _download(self) -> List[Document]:
         from llama.data_loader import GithubLoader
         loader = GithubLoader()
         return loader.load()
-    def _indexing(self, docs: List[Document]) -> None:
         from llama_index import GPTVectorStoreIndex
-        index = GPTVectorStoreIndex.from_documents(
-            docs, service_context=self.service_context_manager.get_service_context()
-        )
         index.storage_context.persist(persist_dir=self.dataset_path)
         self.storage_context = index.storage_context

 from abc import abstractmethod, ABC
+from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
+from llama_index import StorageContext
 from core.lifecycle import Lifecycle
+from langchain_manager.manager import BaseLangChainManager
+class ServiceContextManager(Lifecycle, ABC):
+    @abstractmethod
+    def get_service_context(self) -> ServiceContext:
+        pass
+class AzureServiceContextManager(ServiceContextManager):
+    lc_manager: BaseLangChainManager
+    service_context: ServiceContext
+    def __init__(self, lc_manager: BaseLangChainManager):
+        super().__init__()
+        self.lc_manager = lc_manager
+    def get_service_context(self) -> ServiceContext:
+        if self.service_context is None:
+            raise ValueError(
+                "service context is not ready, check for lifecycle statement"
+            )
+        return self.service_context
+    def do_init(self) -> None:
+        # define embedding
+        embedding = LangchainEmbedding(self.lc_manager.get_embedding())
+        # define LLM
+        llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
+        # configure service context
+        self.service_context = ServiceContext.from_defaults(
+            llm_predictor=llm_predictor, embed_model=embedding
+        )
+    def do_start(self) -> None:
+        self.logger.info("[do_start][embedding] last used usage: %d",
+                         self.service_context.embed_model.total_tokens_used)
+        self.logger.info("[do_start][predict] last used usage: %d",
+                         self.service_context.llm_predictor.total_tokens_used)
+    def do_stop(self) -> None:
+        self.logger.info("[do_stop][embedding] last used usage: %d",
+                         self.service_context.embed_model.total_tokens_used)
+        self.logger.info("[do_stop][predict] last used usage: %d",
+                         self.service_context.llm_predictor.total_tokens_used)
+    def do_dispose(self) -> None:
+        self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
 class StorageContextManager(Lifecycle, ABC):
     @abstractmethod
     def get_storage_context(self) -> StorageContext:
         pass
 class LocalStorageContextManager(StorageContextManager):
     storage_context: StorageContext
+    def __init__(self,
+                 dataset_path: str = "./dataset",
+                 service_context_manager: ServiceContextManager = None) -> None:
         super().__init__()
         self.dataset_path = dataset_path
         self.service_context_manager = service_context_manager
     def do_init(self) -> None:
         from llama.utils import is_local_storage_files_ready
         if is_local_storage_files_ready(self.dataset_path):
+            self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
         else:
             docs = self._download()
             self._indexing(docs)
     def do_dispose(self) -> None:
         self.storage_context.persist(self.dataset_path)
+    def _download(self) -> [Document]:
         from llama.data_loader import GithubLoader
         loader = GithubLoader()
         return loader.load()
+    def _indexing(self, docs: [Document]):
         from llama_index import GPTVectorStoreIndex
+        index = GPTVectorStoreIndex.from_documents(docs,
+                                                   service_context=self.service_context_manager.get_service_context())
         index.storage_context.persist(persist_dir=self.dataset_path)
         self.storage_context = index.storage_context

llama/data_loader.py CHANGED Viewed

@@ -16,10 +16,10 @@ class WikiLoader(ABC):
 class GithubLoader(WikiLoader):
     def __init__(
-        self,
-        github_owner: Optional[str] = None,
-        repo: Optional[str] = None,
-        dirs: Optional[Sequence[str]] = None,
     ):
         super().__init__()
         self.owner = (
@@ -51,8 +51,7 @@ class GithubLoader(WikiLoader):
             verbose=True,
             concurrent_requests=10,
         )
-        os.environ["http_proxy"] = "http://127.0.0.1:7890"
-        os.environ["https_proxy"] = "http://127.0.0.1:7890"
         docs = loader.load_data(branch="master")
         with open("docs/docs.pkl", "wb") as f:

 class GithubLoader(WikiLoader):
     def __init__(
+            self,
+            github_owner: Optional[str] = None,
+            repo: Optional[str] = None,
+            dirs: Optional[Sequence[str]] = None,
     ):
         super().__init__()
         self.owner = (
             verbose=True,
             concurrent_requests=10,
         )
         docs = loader.load_data(branch="master")
         with open("docs/docs.pkl", "wb") as f:

llama/index.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from core.lifecycle import Lifecycle
+from llama.context import ServiceContextManager
+from llama_index.indices.vector_store import VectorStoreIndex
+from typing import Optional
+class IndexManager(Lifecycle):
+    index: Optional[VectorStoreIndex]
+    def __init__(self, context_manager: ServiceContextManager) -> None:
+        super().__init__()
+        self.index = None
+        self.context_manager = context_manager
+    def get_index(self) -> Optional[VectorStoreIndex]:
+        if not self.lifecycle_state.is_started():
+            raise Exception("Lifecycle state is not correct")
+        return self.index

llama/service_context.py DELETED Viewed

@@ -1,142 +0,0 @@
-from abc import abstractmethod, ABC
-from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
-from core.lifecycle import Lifecycle
-from langchain_manager.manager import BaseLangChainManager
-# def get_callback_manager() -> CallbackManager:
-#     from llama_index.callbacks import (
-#         WandbCallbackHandler,
-#         CallbackManager,
-#         LlamaDebugHandler,
-#     )
-#     llama_debug = LlamaDebugHandler(print_trace_on_end=True)
-#     # wandb.init args
-#     run_args = dict(
-#         project="llamaindex",
-#     )
-#     wandb_callback = WandbCallbackHandler(run_args=run_args)
-#     return CallbackManager([llama_debug, wandb_callback])
-class ServiceContextManager(Lifecycle, ABC):
-    @abstractmethod
-    def get_service_context(self) -> ServiceContext:
-        pass
-class AzureServiceContextManager(ServiceContextManager):
-    lc_manager: BaseLangChainManager
-    service_context: ServiceContext
-    def __init__(self, lc_manager: BaseLangChainManager):
-        super().__init__()
-        self.lc_manager = lc_manager
-    def get_service_context(self) -> ServiceContext:
-        if self.service_context is None:
-            raise ValueError(
-                "service context is not ready, check for lifecycle statement"
-            )
-        return self.service_context
-    def do_init(self) -> None:
-        # define embedding
-        embedding = LangchainEmbedding(self.lc_manager.get_embedding())
-        # define LLM
-        llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
-        # configure service context
-        self.service_context = ServiceContext.from_defaults(
-            llm_predictor=llm_predictor,
-            embed_model=embedding,
-            # callback_manager=get_callback_manager(),
-        )
-    def do_start(self) -> None:
-        self.logger.info(
-            "[do_start][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_start][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_stop(self) -> None:
-        self.logger.info(
-            "[do_stop][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_stop][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_dispose(self) -> None:
-        self.logger.info(
-            "[do_dispose] total used token: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
-    lc_manager: BaseLangChainManager
-    service_context: ServiceContext
-    def __init__(self, lc_manager: BaseLangChainManager):
-        super().__init__()
-        self.lc_manager = lc_manager
-    def get_service_context(self) -> ServiceContext:
-        if self.service_context is None:
-            raise ValueError(
-                "service context is not ready, check for lifecycle statement"
-            )
-        return self.service_context
-    def do_init(self) -> None:
-        # define embedding
-        from langchain.embeddings import HuggingFaceEmbeddings
-        model_name = "GanymedeNil/text2vec-large-chinese"
-        hf_embedding = HuggingFaceEmbeddings(
-            model_name=model_name, model_kwargs={"device": "cpu"}
-        )
-        embedding = LangchainEmbedding(hf_embedding)
-        # define LLM
-        llm_predictor = LLMPredictor(self.lc_manager.get_llm())
-        # configure service context
-        self.service_context = ServiceContext.from_defaults(
-            llm_predictor=llm_predictor,
-            embed_model=embedding,
-            # callback_manager=get_callback_manager()
-        )
-    def do_start(self) -> None:
-        self.logger.info(
-            "[do_start][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_start][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_stop(self) -> None:
-        self.logger.info(
-            "[do_stop][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_stop][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_dispose(self) -> None:
-        self.logger.info(
-            "[do_dispose] total used token: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )

llama/vector_storage.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from core.lifecycle import Lifecycle
+class VectorStorageManager(Lifecycle):
+    def __init__(self) -> None:
+        super().__init__()
+    def do_init(self) -> None:
+        pass
+    def do_start(self) -> None:
+        pass
+    def do_stop(self) -> None:
+        pass
+    def do_dispose(self) -> None:
+        pass

local-requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- python-dotenv

requirements.txt CHANGED Viewed

@@ -1,10 +1,6 @@
-llama_index>=0.6.3
-llama_hub
-streamlit
-ruff
-black
-mypy
-accelerate
-python-dotenv
-sentence_transformers
-wandb

+llama_index>=0.6.3
+llama_hub
+ruff
+black
+mypy

xpipe_wiki/manager_factory.py CHANGED Viewed

@@ -4,69 +4,34 @@ import os
 from core.helper import LifecycleHelper
 from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
-from multiprocessing import Lock
-lock = Lock()
 class XPipeRobotRevision(enum.Enum):
     SIMPLE_OPENAI_VERSION_0 = 1
-    HUGGINGFACE_VERSION_0 = 2
-class XPipeRobotManagerFactory:
-    """
-    CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
-    {XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
-    """
-    CAPABLE = dict()  # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
     @classmethod
     def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
-        with lock:
-            if cls.CAPABLE.get(revision) is not None:
-                return cls.CAPABLE[revision]
-            if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
-                manager = cls.create_simple_openai_version_0()
-            elif revision == XPipeRobotRevision.HUGGINGFACE_VERSION_0:
-                manager = cls.create_huggingface_version_0()
-            cls.CAPABLE[revision] = manager
-            return manager
     @classmethod
     def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
-        from llama.service_context import AzureServiceContextManager
         from langchain_manager.manager import LangChainAzureManager
         service_context_manager = AzureServiceContextManager(
             lc_manager=LangChainAzureManager()
         )
-        from llama.storage_context import LocalStorageContextManager
-        dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
-        storage_context_manager = LocalStorageContextManager(
-            dataset_path=dataset_path, service_context_manager=service_context_manager
-        )
-        robot_manager = AzureXPipeWikiRobotManager(
-            service_context_manager=service_context_manager,
-            storage_context_manager=storage_context_manager,
-        )
-        LifecycleHelper.initialize_if_possible(robot_manager)
-        LifecycleHelper.start_if_possible(robot_manager)
-        return robot_manager
-    @classmethod
-    def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
-        from llama.service_context import HuggingFaceChineseOptServiceContextManager
-        from langchain_manager.manager import LangChainAzureManager
-        service_context_manager = HuggingFaceChineseOptServiceContextManager(
-            lc_manager=LangChainAzureManager()
-        )
-        from llama.storage_context import LocalStorageContextManager
         dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
         storage_context_manager = LocalStorageContextManager(

 from core.helper import LifecycleHelper
 from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
 class XPipeRobotRevision(enum.Enum):
     SIMPLE_OPENAI_VERSION_0 = 1
+CAPABLE = dict()
+class XPipeRobotManagerFactory:
     @classmethod
     def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
+        if CAPABLE.get(revision) is not None:
+            return CAPABLE[revision]
+        if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
+            manager = cls.create_simple_openai_version_0()
+        CAPABLE[revision] = manager
+        return manager
     @classmethod
     def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
+        from llama.context import AzureServiceContextManager
         from langchain_manager.manager import LangChainAzureManager
         service_context_manager = AzureServiceContextManager(
             lc_manager=LangChainAzureManager()
         )
+        from llama.context import LocalStorageContextManager
         dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
         storage_context_manager = LocalStorageContextManager(

xpipe_wiki/robot_manager.py CHANGED Viewed

@@ -3,12 +3,10 @@ from typing import Any
 from llama_index import load_index_from_storage
 from llama_index.indices.query.base import BaseQueryEngine
-from llama_index.indices.response import ResponseMode
 from core.helper import LifecycleHelper
 from core.lifecycle import Lifecycle
-from llama.service_context import ServiceContextManager
-from llama.storage_context import StorageContextManager
 class XPipeWikiRobot(ABC):
@@ -25,10 +23,7 @@ class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
         self.query_engine = query_engine
     def ask(self, question: str) -> Any:
-        print("question: ", question)
-        response = self.query_engine.query(question)
-        print("response type: ", type(response))
-        return response.__str__()
 class XPipeWikiRobotManager(Lifecycle):
@@ -64,10 +59,10 @@ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
         index = load_index_from_storage(
             storage_context=self.storage_context_manager.get_storage_context(),
             service_context=self.service_context_manager.get_service_context(),
         )
         self.query_engine = index.as_query_engine(
-            service_context=self.service_context_manager.get_service_context(),
-            response_mode=ResponseMode.TREE_SUMMARIZE,
         )
     def do_stop(self) -> None:

 from llama_index import load_index_from_storage
 from llama_index.indices.query.base import BaseQueryEngine
 from core.helper import LifecycleHelper
 from core.lifecycle import Lifecycle
+from llama.context import ServiceContextManager, StorageContextManager
 class XPipeWikiRobot(ABC):
         self.query_engine = query_engine
     def ask(self, question: str) -> Any:
+        return self.query_engine.query(question)
 class XPipeWikiRobotManager(Lifecycle):
         index = load_index_from_storage(
             storage_context=self.storage_context_manager.get_storage_context(),
             service_context=self.service_context_manager.get_service_context(),
         )
         self.query_engine = index.as_query_engine(
+            service_context=self.service_context_manager.get_service_context()
         )
     def do_stop(self) -> None: