Spaces:

momegas
/

megabots

Runtime error

App Files Files

momegas commited on Apr 11, 2023

Commit

56724c7

0 Parent(s):

👀 Initial commit

Browse files

Files changed (5) hide show

.gitignore +1 -0
README.md +45 -0
qnabot/QnABot.py +85 -0
qnabot/__init__.py +0 -0
setup.py +23 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv

README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+# QnA Bot
+Create a question answering over docs bot with one line of code:
+```python
+from qnabot import QnABot
+import os
+os.environ["OPENAI_API_KEY"] = "my key"
+bot = QnABot(directory="./mydata")
+```
+### Here's how it works
+High level overview what is happening under the hood:
+```mermaid
+sequenceDiagram
+    actor User
+    participant API
+    participant LLM
+    participant Vectorstore
+    participant IngestionEngine
+    participant DataLake
+    autonumber
+    Note over API, DataLake: Ingestion phase
+    loop Every X time
+    IngestionEngine ->> DataLake: Load documents
+    DataLake -->> IngestionEngine: Return data
+    IngestionEngine -->> IngestionEngine: Split documents and Create embeddings
+    IngestionEngine ->> Vectorstore: Store documents and embeddings
+    end
+    Note over API, DataLake: Generation phase
+    User ->> API: Receive user question
+    API ->> Vectorstore: Lookup documents in the index relevant to the question
+    API ->> API: Construct a prompt from the question and any relevant documents
+    API ->> LLM: Pass the prompt to the model
+    LLM -->> API: Get response from model
+    API -->> User: Return response
+```

qnabot/QnABot.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# Import necessary libraries and modules
+from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.document_loaders import DirectoryLoader, S3DirectoryLoader
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain.vectorstores.faiss import FAISS
+import pickle
+import os
+class QnABot:
+    def __init__(
+        self,
+        directory: str,
+        index: str | None = None,
+        model: str | None = None,
+        temperature=0,
+    ):
+        # Initialize the QnABot by selecting a model, creating a loader,
+        # and loading or creating an index
+        self.select_model(model, temperature)
+        self.create_loader(directory)
+        self.load_or_create_index(index)
+        # Load the question-answering chain for the selected model
+        self.chain = load_qa_with_sources_chain(self.llm)
+    def select_model(self, model: str | None, temperature: float):
+        # Select and set the appropriate model based on the provided input
+        if model is None or model == "gpt-3.5-turbo":
+            print("Using model: gpt-3.5-turbo")
+            self.llm = ChatOpenAI(temperature=temperature)
+        if model == "text-davinci-003":
+            print("Using model: text-davinci-003")
+            self.llm = OpenAI(temperature=temperature)
+    def create_loader(self, directory: str):
+        # Create a loader based on the provided directory (either local or S3)
+        if directory.startswith("s3://"):
+            self.loader = S3DirectoryLoader(directory)
+        else:
+            self.loader = DirectoryLoader(directory, recursive=True)
+    def load_or_create_index(self, index_path: str | None):
+        # Load an existing index from disk or create a new one if not available
+        if index_path is not None and os.path.exists(index_path):
+            print("Loading path from disk...")
+            with open(index_path, "rb") as f:
+                self.search_index = pickle.load(f)
+        else:
+            print("Creating index...")
+            self.search_index = FAISS.from_documents(
+                self.loader.load_and_split(), OpenAIEmbeddings()
+            )
+    def save_index(self, index_path: str):
+        # Save the index to the specified path
+        with open(index_path, "wb") as f:
+            pickle.dump(self.search_index, f)
+    def print_answer(self, question, k=1):
+        # Retrieve and print the answer to the given question
+        input_documents = self.search_index.similarity_search(question, k=k)
+        print(
+            self.chain(
+                {
+                    "input_documents": input_documents,
+                    "question": question,
+                },
+                return_only_outputs=True,
+            )["output_text"]
+        )
+    def get_answer(self, question, k=1):
+        # Retrieve the answer to the given question and return it
+        input_documents = self.search_index.similarity_search(question, k=k)
+        return self.chain(
+            {
+                "input_documents": input_documents,
+                "question": question,
+            },
+            return_only_outputs=True,
+        )["output_text"]

qnabot/__init__.py ADDED Viewed

File without changes

setup.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from setuptools import setup, find_packages
+setup(
+    name="qnabot",
+    version="0.0.1",
+    packages=find_packages(),
+    install_requires=[
+        # List your package's dependencies here, e.g.,
+        # "numpy>=1.18",
+    ],
+    author="Megaklis Vasilakis",
+    author_email="megaklis.vasilakis@gmail.com",
+    description="Create a question answering over docs bot with one line of code.",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/momegas/qnabot",
+    classifiers=[
+        # Choose appropriate classifiers from
+        # https://pypi.org/classifiers/
+        Development Status :: 4 - Beta
+    ],
+)