Spaces:
Runtime error
Runtime error
Commit Β·
56724c7
0
Parent(s):
π Initial commit
Browse files- .gitignore +1 -0
- README.md +45 -0
- qnabot/QnABot.py +85 -0
- qnabot/__init__.py +0 -0
- setup.py +23 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.venv
|
README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# QnA Bot
|
| 2 |
+
|
| 3 |
+
Create a question answering over docs bot with one line of code:
|
| 4 |
+
|
| 5 |
+
```python
|
| 6 |
+
from qnabot import QnABot
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
os.environ["OPENAI_API_KEY"] = "my key"
|
| 10 |
+
|
| 11 |
+
bot = QnABot(directory="./mydata")
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
### Here's how it works
|
| 15 |
+
|
| 16 |
+
High level overview what is happening under the hood:
|
| 17 |
+
|
| 18 |
+
```mermaid
|
| 19 |
+
sequenceDiagram
|
| 20 |
+
actor User
|
| 21 |
+
participant API
|
| 22 |
+
participant LLM
|
| 23 |
+
participant Vectorstore
|
| 24 |
+
participant IngestionEngine
|
| 25 |
+
participant DataLake
|
| 26 |
+
autonumber
|
| 27 |
+
|
| 28 |
+
Note over API, DataLake: Ingestion phase
|
| 29 |
+
loop Every X time
|
| 30 |
+
IngestionEngine ->> DataLake: Load documents
|
| 31 |
+
DataLake -->> IngestionEngine: Return data
|
| 32 |
+
IngestionEngine -->> IngestionEngine: Split documents and Create embeddings
|
| 33 |
+
IngestionEngine ->> Vectorstore: Store documents and embeddings
|
| 34 |
+
end
|
| 35 |
+
|
| 36 |
+
Note over API, DataLake: Generation phase
|
| 37 |
+
|
| 38 |
+
User ->> API: Receive user question
|
| 39 |
+
API ->> Vectorstore: Lookup documents in the index relevant to the question
|
| 40 |
+
API ->> API: Construct a prompt from the question and any relevant documents
|
| 41 |
+
API ->> LLM: Pass the prompt to the model
|
| 42 |
+
LLM -->> API: Get response from model
|
| 43 |
+
API -->> User: Return response
|
| 44 |
+
|
| 45 |
+
```
|
qnabot/QnABot.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import necessary libraries and modules
|
| 2 |
+
from langchain.llms import OpenAI
|
| 3 |
+
from langchain.chat_models import ChatOpenAI
|
| 4 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 5 |
+
from langchain.document_loaders import DirectoryLoader, S3DirectoryLoader
|
| 6 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
| 7 |
+
from langchain.vectorstores.faiss import FAISS
|
| 8 |
+
import pickle
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class QnABot:
|
| 13 |
+
def __init__(
|
| 14 |
+
self,
|
| 15 |
+
directory: str,
|
| 16 |
+
index: str | None = None,
|
| 17 |
+
model: str | None = None,
|
| 18 |
+
temperature=0,
|
| 19 |
+
):
|
| 20 |
+
# Initialize the QnABot by selecting a model, creating a loader,
|
| 21 |
+
# and loading or creating an index
|
| 22 |
+
self.select_model(model, temperature)
|
| 23 |
+
self.create_loader(directory)
|
| 24 |
+
self.load_or_create_index(index)
|
| 25 |
+
|
| 26 |
+
# Load the question-answering chain for the selected model
|
| 27 |
+
self.chain = load_qa_with_sources_chain(self.llm)
|
| 28 |
+
|
| 29 |
+
def select_model(self, model: str | None, temperature: float):
|
| 30 |
+
# Select and set the appropriate model based on the provided input
|
| 31 |
+
if model is None or model == "gpt-3.5-turbo":
|
| 32 |
+
print("Using model: gpt-3.5-turbo")
|
| 33 |
+
self.llm = ChatOpenAI(temperature=temperature)
|
| 34 |
+
|
| 35 |
+
if model == "text-davinci-003":
|
| 36 |
+
print("Using model: text-davinci-003")
|
| 37 |
+
self.llm = OpenAI(temperature=temperature)
|
| 38 |
+
|
| 39 |
+
def create_loader(self, directory: str):
|
| 40 |
+
# Create a loader based on the provided directory (either local or S3)
|
| 41 |
+
if directory.startswith("s3://"):
|
| 42 |
+
self.loader = S3DirectoryLoader(directory)
|
| 43 |
+
else:
|
| 44 |
+
self.loader = DirectoryLoader(directory, recursive=True)
|
| 45 |
+
|
| 46 |
+
def load_or_create_index(self, index_path: str | None):
|
| 47 |
+
# Load an existing index from disk or create a new one if not available
|
| 48 |
+
if index_path is not None and os.path.exists(index_path):
|
| 49 |
+
print("Loading path from disk...")
|
| 50 |
+
with open(index_path, "rb") as f:
|
| 51 |
+
self.search_index = pickle.load(f)
|
| 52 |
+
else:
|
| 53 |
+
print("Creating index...")
|
| 54 |
+
self.search_index = FAISS.from_documents(
|
| 55 |
+
self.loader.load_and_split(), OpenAIEmbeddings()
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
def save_index(self, index_path: str):
|
| 59 |
+
# Save the index to the specified path
|
| 60 |
+
with open(index_path, "wb") as f:
|
| 61 |
+
pickle.dump(self.search_index, f)
|
| 62 |
+
|
| 63 |
+
def print_answer(self, question, k=1):
|
| 64 |
+
# Retrieve and print the answer to the given question
|
| 65 |
+
input_documents = self.search_index.similarity_search(question, k=k)
|
| 66 |
+
print(
|
| 67 |
+
self.chain(
|
| 68 |
+
{
|
| 69 |
+
"input_documents": input_documents,
|
| 70 |
+
"question": question,
|
| 71 |
+
},
|
| 72 |
+
return_only_outputs=True,
|
| 73 |
+
)["output_text"]
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
def get_answer(self, question, k=1):
|
| 77 |
+
# Retrieve the answer to the given question and return it
|
| 78 |
+
input_documents = self.search_index.similarity_search(question, k=k)
|
| 79 |
+
return self.chain(
|
| 80 |
+
{
|
| 81 |
+
"input_documents": input_documents,
|
| 82 |
+
"question": question,
|
| 83 |
+
},
|
| 84 |
+
return_only_outputs=True,
|
| 85 |
+
)["output_text"]
|
qnabot/__init__.py
ADDED
|
File without changes
|
setup.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from setuptools import setup, find_packages
|
| 2 |
+
|
| 3 |
+
setup(
|
| 4 |
+
name="qnabot",
|
| 5 |
+
version="0.0.1",
|
| 6 |
+
packages=find_packages(),
|
| 7 |
+
install_requires=[
|
| 8 |
+
# List your package's dependencies here, e.g.,
|
| 9 |
+
# "numpy>=1.18",
|
| 10 |
+
],
|
| 11 |
+
author="Megaklis Vasilakis",
|
| 12 |
+
author_email="megaklis.vasilakis@gmail.com",
|
| 13 |
+
description="Create a question answering over docs bot with one line of code.",
|
| 14 |
+
long_description=open("README.md").read(),
|
| 15 |
+
long_description_content_type="text/markdown",
|
| 16 |
+
url="https://github.com/momegas/qnabot",
|
| 17 |
+
classifiers=[
|
| 18 |
+
# Choose appropriate classifiers from
|
| 19 |
+
# https://pypi.org/classifiers/
|
| 20 |
+
|
| 21 |
+
Development Status :: 4 - Beta
|
| 22 |
+
],
|
| 23 |
+
)
|