Spaces:
Runtime error
Runtime error
refactor: separate app components (#17) (#18)
Browse files- app.py +12 -118
- {src → gnosis}/agent.py +1 -1
- gnosis/builder.py +25 -0
- {src → gnosis}/chroma_client.py +0 -0
- gnosis/components/handlers.py +15 -0
- gnosis/components/main.py +56 -0
- gnosis/components/sidebar.py +66 -0
- {src → gnosis}/gui_messages.py +6 -0
- {src → gnosis}/search.py +0 -0
- {src → gnosis}/settings.py +0 -0
app.py
CHANGED
|
@@ -1,139 +1,33 @@
|
|
| 1 |
-
""" A
|
| 2 |
-
import textwrap
|
| 3 |
import os
|
| 4 |
-
import tiktoken
|
| 5 |
-
import fitz
|
| 6 |
import streamlit as st
|
| 7 |
import openai
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
from
|
| 14 |
-
|
| 15 |
-
from src.agent import PDFExplainer
|
| 16 |
|
| 17 |
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
|
| 21 |
-
def set_api_key():
|
| 22 |
-
"""Set the OpenAI API key."""
|
| 23 |
-
openai.api_key = st.session_state.api_key
|
| 24 |
-
st.session_state.api_message = gm.api_message(openai.api_key)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def click_wk_button():
|
| 28 |
-
"""Set the OpenAI API key."""
|
| 29 |
-
st.session_state.wk_button = not st.session_state.wk_button
|
| 30 |
-
|
| 31 |
-
|
| 32 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 33 |
|
| 34 |
if "api_message" not in st.session_state:
|
| 35 |
st.session_state.api_message = gm.api_message(openai.api_key)
|
| 36 |
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Build settings
|
| 39 |
chroma_db = ChromaDB(openai.api_key)
|
| 40 |
collection = settings.build(chroma_db)
|
| 41 |
|
| 42 |
# Sidebar
|
| 43 |
-
|
| 44 |
-
st.write("## OpenAI API key")
|
| 45 |
-
openai.api_key = st.text_input(
|
| 46 |
-
"Enter OpenAI API key",
|
| 47 |
-
value="",
|
| 48 |
-
type="password",
|
| 49 |
-
key="api_key",
|
| 50 |
-
placeholder="Enter your OpenAI API key",
|
| 51 |
-
on_change=set_api_key,
|
| 52 |
-
label_visibility="collapsed",
|
| 53 |
-
)
|
| 54 |
-
st.write(
|
| 55 |
-
"You can find your API key at https://platform.openai.com/account/api-keys"
|
| 56 |
-
)
|
| 57 |
-
if "wk_button" not in st.session_state:
|
| 58 |
-
st.session_state.wk_button = False
|
| 59 |
-
|
| 60 |
-
st.checkbox(
|
| 61 |
-
"Use Wikipedia", on_change=click_wk_button, value=st.session_state.wk_button
|
| 62 |
-
)
|
| 63 |
-
st.subheader("Creativity")
|
| 64 |
-
st.write("The higher the value, the crazier the text.")
|
| 65 |
-
st.slider(
|
| 66 |
-
"Temperature",
|
| 67 |
-
min_value=0.0,
|
| 68 |
-
max_value=2.0,
|
| 69 |
-
value=0.9,
|
| 70 |
-
step=0.01,
|
| 71 |
-
key="temperature",
|
| 72 |
-
)
|
| 73 |
-
|
| 74 |
-
if st.button("Delete collection"):
|
| 75 |
-
st.warning("Are you sure?")
|
| 76 |
-
if st.button("Yes"):
|
| 77 |
-
try:
|
| 78 |
-
chroma_db.delete_collection(collection.name)
|
| 79 |
-
except AttributeError:
|
| 80 |
-
st.error("Collection erased.")
|
| 81 |
-
|
| 82 |
-
# Main
|
| 83 |
-
st.title("GnosisPages")
|
| 84 |
-
st.subheader("Create your knowledge base")
|
| 85 |
-
|
| 86 |
-
## Uploader
|
| 87 |
-
|
| 88 |
-
st.write(
|
| 89 |
-
"Upload, extract and consult the content of PDF Files for builiding your knowledge base!"
|
| 90 |
-
)
|
| 91 |
-
pdf = st.file_uploader("Upload a file", type="pdf")
|
| 92 |
-
|
| 93 |
-
if pdf is not None:
|
| 94 |
-
with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document
|
| 95 |
-
with st.spinner("Extracting text..."):
|
| 96 |
-
text = chr(12).join([page.get_text() for page in doc])
|
| 97 |
-
st.subheader("Text preview")
|
| 98 |
-
st.write(text[0:300] + "...")
|
| 99 |
-
if st.button("Save chunks"):
|
| 100 |
-
with st.spinner("Saving chunks..."):
|
| 101 |
-
chunks = textwrap.wrap(text, 1250)
|
| 102 |
-
for idx, chunk in enumerate(chunks):
|
| 103 |
-
encoding = tiktoken.get_encoding("cl100k_base")
|
| 104 |
-
num_tokens = len(encoding.encode(chunk))
|
| 105 |
-
collection.add(
|
| 106 |
-
documents=[chunk],
|
| 107 |
-
metadatas=[{"source": pdf.name, "num_tokens": num_tokens}],
|
| 108 |
-
ids=[pdf.name + str(idx)],
|
| 109 |
-
)
|
| 110 |
-
else:
|
| 111 |
-
st.write("Please upload a file of type: pdf")
|
| 112 |
-
|
| 113 |
-
st.subheader("Consult your knowledge base")
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
prompt = st.chat_input()
|
| 117 |
-
|
| 118 |
-
if prompt:
|
| 119 |
-
# Create Agent
|
| 120 |
-
try:
|
| 121 |
-
openai_api_key = openai.api_key
|
| 122 |
-
llm = ChatOpenAI(
|
| 123 |
-
temperature=st.session_state.temperature,
|
| 124 |
-
model="gpt-3.5-turbo-16k",
|
| 125 |
-
api_key=openai.api_key,
|
| 126 |
-
)
|
| 127 |
-
agent = PDFExplainer(
|
| 128 |
-
llm,
|
| 129 |
-
chroma_db,
|
| 130 |
-
extra_tools=st.session_state.wk_button,
|
| 131 |
-
).agent
|
| 132 |
-
except Exception: # pylint: disable=broad-exception-caught
|
| 133 |
-
st.warning("Missing OpenAI API Key.")
|
| 134 |
|
| 135 |
-
|
| 136 |
-
with st.chat_message("assistant"):
|
| 137 |
-
st_callback = StreamlitCallbackHandler(st.container())
|
| 138 |
-
response = agent.run(prompt, callbacks=[st_callback])
|
| 139 |
-
st.write(response)
|
|
|
|
| 1 |
+
""" A Streamlit app for GnosisPages. """
|
|
|
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
import openai
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
+
from gnosis.chroma_client import ChromaDB
|
| 7 |
+
import gnosis.gui_messages as gm
|
| 8 |
+
from gnosis import settings
|
| 9 |
+
from gnosis.components.sidebar import sidebar
|
| 10 |
+
from gnosis.components.main import main
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 17 |
|
| 18 |
if "api_message" not in st.session_state:
|
| 19 |
st.session_state.api_message = gm.api_message(openai.api_key)
|
| 20 |
|
| 21 |
|
| 22 |
+
if "wk_button" not in st.session_state:
|
| 23 |
+
st.session_state.wk_button = False
|
| 24 |
+
|
| 25 |
+
|
| 26 |
# Build settings
|
| 27 |
chroma_db = ChromaDB(openai.api_key)
|
| 28 |
collection = settings.build(chroma_db)
|
| 29 |
|
| 30 |
# Sidebar
|
| 31 |
+
sidebar(chroma_db, collection)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
main(openai.api_key, chroma_db, collection)
|
|
|
|
|
|
|
|
|
|
|
|
{src → gnosis}/agent.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""An Langchain Agent that uses ChromaDB as a query tool"""
|
| 2 |
from langchain.agents import AgentType, initialize_agent, load_tools
|
| 3 |
from langchain.tools import Tool
|
| 4 |
-
from
|
| 5 |
|
| 6 |
|
| 7 |
class PDFExplainer:
|
|
|
|
| 1 |
"""An Langchain Agent that uses ChromaDB as a query tool"""
|
| 2 |
from langchain.agents import AgentType, initialize_agent, load_tools
|
| 3 |
from langchain.tools import Tool
|
| 4 |
+
from gnosis.search import Search
|
| 5 |
|
| 6 |
|
| 7 |
class PDFExplainer:
|
gnosis/builder.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Module for building the Langchain Agent"""
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from langchain.chat_models import ChatOpenAI
|
| 4 |
+
from gnosis.agent import PDFExplainer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def build(key, client):
|
| 8 |
+
"""An Agent builder"""
|
| 9 |
+
# Build Agent
|
| 10 |
+
try:
|
| 11 |
+
print(str(st.session_state.temperature))
|
| 12 |
+
llm = ChatOpenAI(
|
| 13 |
+
temperature=st.session_state.temperature,
|
| 14 |
+
model="gpt-3.5-turbo-16k",
|
| 15 |
+
api_key=key,
|
| 16 |
+
)
|
| 17 |
+
agent = PDFExplainer(
|
| 18 |
+
llm,
|
| 19 |
+
client,
|
| 20 |
+
extra_tools=st.session_state.wk_button,
|
| 21 |
+
).agent
|
| 22 |
+
except Exception: # pylint: disable=broad-exception-caught
|
| 23 |
+
st.warning("Missing OpenAI API Key.")
|
| 24 |
+
|
| 25 |
+
return agent
|
{src → gnosis}/chroma_client.py
RENAMED
|
File without changes
|
gnosis/components/handlers.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Handler functions for the components"""
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import openai
|
| 4 |
+
import gnosis.gui_messages as gm
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def set_api_key():
|
| 8 |
+
"""Set the OpenAI API key."""
|
| 9 |
+
openai.api_key = st.session_state.api_key
|
| 10 |
+
st.session_state.api_message = gm.api_message(openai.api_key)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def click_wk_button():
|
| 14 |
+
"""Set the OpenAI API key."""
|
| 15 |
+
st.session_state.wk_button = not st.session_state.wk_button
|
gnosis/components/main.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main component"""
|
| 2 |
+
import textwrap
|
| 3 |
+
import tiktoken
|
| 4 |
+
import fitz
|
| 5 |
+
import streamlit as st
|
| 6 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
| 7 |
+
import gnosis.gui_messages as gm
|
| 8 |
+
from gnosis.builder import build
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def uploader(collection):
|
| 12 |
+
"""Component for upload files"""
|
| 13 |
+
st.write(
|
| 14 |
+
"Upload, extract and consult the content of PDF Files for builiding your knowledge base!"
|
| 15 |
+
)
|
| 16 |
+
pdf = st.file_uploader("Upload a file", type="pdf")
|
| 17 |
+
|
| 18 |
+
if pdf is not None:
|
| 19 |
+
with fitz.open(stream=pdf.read(), filetype="pdf") as doc: # open document
|
| 20 |
+
with st.spinner("Extracting text..."):
|
| 21 |
+
text = chr(12).join([page.get_text() for page in doc])
|
| 22 |
+
st.subheader("Text preview")
|
| 23 |
+
st.write(text[0:300] + "...")
|
| 24 |
+
if st.button("Save chunks"):
|
| 25 |
+
with st.spinner("Saving chunks..."):
|
| 26 |
+
chunks = textwrap.wrap(text, 1250)
|
| 27 |
+
for idx, chunk in enumerate(chunks):
|
| 28 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
| 29 |
+
num_tokens = len(encoding.encode(chunk))
|
| 30 |
+
collection.add(
|
| 31 |
+
documents=[chunk],
|
| 32 |
+
metadatas=[{"source": pdf.name, "num_tokens": num_tokens}],
|
| 33 |
+
ids=[pdf.name + str(idx)],
|
| 34 |
+
)
|
| 35 |
+
else:
|
| 36 |
+
st.write("Please upload a file of type: pdf")
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def main(key, client, collection):
|
| 40 |
+
"""Main component"""
|
| 41 |
+
gm.header()
|
| 42 |
+
|
| 43 |
+
uploader(collection)
|
| 44 |
+
|
| 45 |
+
st.subheader("Consult your knowledge base")
|
| 46 |
+
|
| 47 |
+
prompt = st.chat_input()
|
| 48 |
+
|
| 49 |
+
if prompt:
|
| 50 |
+
agent = build(key, client)
|
| 51 |
+
|
| 52 |
+
st.chat_message("user").write(prompt)
|
| 53 |
+
with st.chat_message("assistant"):
|
| 54 |
+
st_callback = StreamlitCallbackHandler(st.container())
|
| 55 |
+
response = agent.run(prompt, callbacks=[st_callback])
|
| 56 |
+
st.write(response)
|
gnosis/components/sidebar.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Sidebar component for the Streamlit app."""
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import openai
|
| 4 |
+
from gnosis.components.handlers import set_api_key, click_wk_button
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def delete_collection(client, collection):
|
| 8 |
+
"""Delete collection button."""
|
| 9 |
+
if st.button("Delete collection"):
|
| 10 |
+
st.warning("Are you sure?")
|
| 11 |
+
if st.button("Yes"):
|
| 12 |
+
try:
|
| 13 |
+
client.delete_collection(collection.name)
|
| 14 |
+
except AttributeError:
|
| 15 |
+
st.error("Collection erased.")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def openai_api_key_box():
|
| 19 |
+
"""Box for entrying OpenAi API Key"""
|
| 20 |
+
st.sidebar.write("## OpenAI API key")
|
| 21 |
+
openai.api_key = st.sidebar.text_input(
|
| 22 |
+
"Enter OpenAI API key",
|
| 23 |
+
value="",
|
| 24 |
+
type="password",
|
| 25 |
+
key="api_key",
|
| 26 |
+
placeholder="Enter your OpenAI API key",
|
| 27 |
+
on_change=set_api_key,
|
| 28 |
+
label_visibility="collapsed",
|
| 29 |
+
)
|
| 30 |
+
st.sidebar.write(
|
| 31 |
+
"You can find your API key at https://platform.openai.com/account/api-keys"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def creativity_slider():
|
| 36 |
+
"""Slider with temperature level"""
|
| 37 |
+
st.sidebar.subheader("Creativity")
|
| 38 |
+
st.sidebar.write("The higher the value, the crazier the text.")
|
| 39 |
+
st.sidebar.slider(
|
| 40 |
+
"Temperature",
|
| 41 |
+
min_value=0.0,
|
| 42 |
+
max_value=1.25, # Max level is 2, but it's too stochastic
|
| 43 |
+
value=0.5,
|
| 44 |
+
step=0.01,
|
| 45 |
+
key="temperature",
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def wk_checkbox():
|
| 50 |
+
"""Wikipedia Checkbox for changing state"""
|
| 51 |
+
st.sidebar.checkbox(
|
| 52 |
+
"Use Wikipedia", on_change=click_wk_button, value=st.session_state.wk_button
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Sidebar
|
| 57 |
+
def sidebar(client, collection):
|
| 58 |
+
"""Sidebar component for the Streamlit app."""
|
| 59 |
+
with st.sidebar:
|
| 60 |
+
openai_api_key_box()
|
| 61 |
+
|
| 62 |
+
wk_checkbox()
|
| 63 |
+
|
| 64 |
+
creativity_slider()
|
| 65 |
+
|
| 66 |
+
delete_collection(client, collection)
|
{src → gnosis}/gui_messages.py
RENAMED
|
@@ -2,6 +2,12 @@
|
|
| 2 |
import streamlit as st
|
| 3 |
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def api_message(api_key):
|
| 6 |
"""Inform if the api key is set."""
|
| 7 |
if api_key is None:
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
|
| 4 |
|
| 5 |
+
def header():
|
| 6 |
+
"""A header"""
|
| 7 |
+
st.title("GnosisPages")
|
| 8 |
+
st.subheader("Create your knowledge base")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
def api_message(api_key):
|
| 12 |
"""Inform if the api key is set."""
|
| 13 |
if api_key is None:
|
{src → gnosis}/search.py
RENAMED
|
File without changes
|
{src → gnosis}/settings.py
RENAMED
|
File without changes
|