Spaces:
Runtime error
Runtime error
feature added to get data from url
Browse files
app.py
CHANGED
|
@@ -12,11 +12,13 @@ from loguru import logger
|
|
| 12 |
from config import Config
|
| 13 |
from utils import create_vectordb
|
| 14 |
from utils import get_qa_chain
|
|
|
|
|
|
|
|
|
|
| 15 |
load_dotenv()
|
| 16 |
|
| 17 |
openai.api_key = os.environ['OPENAI_API_KEY']
|
| 18 |
|
| 19 |
-
|
| 20 |
if 'messages' not in st.session_state:
|
| 21 |
st.session_state.messages = []
|
| 22 |
|
|
@@ -25,34 +27,47 @@ for message in st.session_state.messages:
|
|
| 25 |
st.markdown(message['content'])
|
| 26 |
|
| 27 |
uploaded_file = st.sidebar.file_uploader('Upload a file', type=['pdf', 'txt'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def set_status():
|
| 31 |
if uploaded_file is None:
|
| 32 |
-
Path(Config.vectorstore_path).unlink(missing_ok=True)
|
| 33 |
st.sidebar.info('Upoad a file to start a conversation')
|
| 34 |
else:
|
| 35 |
st.sidebar.info(f'Let"s talk to {Path(uploaded_file.name)}')
|
| 36 |
|
| 37 |
|
| 38 |
-
def
|
| 39 |
if 'context' not in st.session_state:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
with open(upath, 'wb') as hndl:
|
| 45 |
-
hndl.write(uploaded_file.getbuffer())
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
st.session_state['context'] = True
|
| 49 |
|
| 50 |
|
| 51 |
set_status()
|
| 52 |
|
| 53 |
|
| 54 |
-
if uploaded_file is not None:
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
qr_chain = get_qa_chain()
|
| 57 |
|
| 58 |
if prompt := st.chat_input('Send a message'):
|
|
|
|
| 12 |
from config import Config
|
| 13 |
from utils import create_vectordb
|
| 14 |
from utils import get_qa_chain
|
| 15 |
+
from utils import load_file
|
| 16 |
+
from utils import load_url
|
| 17 |
+
from utils import save_file_locally
|
| 18 |
load_dotenv()
|
| 19 |
|
| 20 |
openai.api_key = os.environ['OPENAI_API_KEY']
|
| 21 |
|
|
|
|
| 22 |
if 'messages' not in st.session_state:
|
| 23 |
st.session_state.messages = []
|
| 24 |
|
|
|
|
| 27 |
st.markdown(message['content'])
|
| 28 |
|
| 29 |
uploaded_file = st.sidebar.file_uploader('Upload a file', type=['pdf', 'txt'])
|
| 30 |
+
doc_url = st.sidebar.text_input('Or enter a URL to a document')
|
| 31 |
+
|
| 32 |
+
if uploaded_file is not None and doc_url != '':
|
| 33 |
+
st.sidebar.error('Please choose one or the other')
|
| 34 |
+
st.stop()
|
| 35 |
|
| 36 |
|
| 37 |
def set_status():
|
| 38 |
if uploaded_file is None:
|
| 39 |
+
# Path(Config.vectorstore_path).unlink(missing_ok=True)
|
| 40 |
st.sidebar.info('Upoad a file to start a conversation')
|
| 41 |
else:
|
| 42 |
st.sidebar.info(f'Let"s talk to {Path(uploaded_file.name)}')
|
| 43 |
|
| 44 |
|
| 45 |
+
def process_data(data, data_type):
|
| 46 |
if 'context' not in st.session_state:
|
| 47 |
+
if data_type == 'file':
|
| 48 |
+
upath = f'docs/{uploaded_file.name}'
|
| 49 |
+
save_file_locally(data, upath)
|
| 50 |
+
load_file(upath)
|
| 51 |
+
else:
|
| 52 |
+
load_url(data)
|
| 53 |
+
st.session_state['context'] = True
|
| 54 |
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
def process_uploaded_doc():
|
| 57 |
+
if 'context' not in st.session_state:
|
| 58 |
+
loader = Uns
|
| 59 |
st.session_state['context'] = True
|
| 60 |
|
| 61 |
|
| 62 |
set_status()
|
| 63 |
|
| 64 |
|
| 65 |
+
if uploaded_file is not None or doc_url != '':
|
| 66 |
+
if uploaded_file is not None:
|
| 67 |
+
process_data(uploaded_file, data_type='file')
|
| 68 |
+
else:
|
| 69 |
+
process_data(doc_url, data_type='url')
|
| 70 |
+
|
| 71 |
qr_chain = get_qa_chain()
|
| 72 |
|
| 73 |
if prompt := st.chat_input('Send a message'):
|
utils.py
CHANGED
|
@@ -7,6 +7,7 @@ from dotenv import load_dotenv
|
|
| 7 |
from langchain.chains import ConversationalRetrievalChain
|
| 8 |
from langchain.chat_models import ChatOpenAI
|
| 9 |
from langchain.document_loaders import UnstructuredFileLoader
|
|
|
|
| 10 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 11 |
from langchain.memory import ConversationBufferMemory
|
| 12 |
from langchain.prompts import PromptTemplate
|
|
@@ -29,19 +30,62 @@ def get_prompt():
|
|
| 29 |
"""
|
| 30 |
This function creates a prompt template that will be used to generate the prompt for the model.
|
| 31 |
"""
|
| 32 |
-
template = """
|
| 33 |
---
|
| 34 |
Context: {context}
|
| 35 |
Question: {question}
|
| 36 |
Answer:"""
|
| 37 |
qa_prompt = PromptTemplate(
|
| 38 |
template=template, input_variables=[
|
| 39 |
-
'question', 'context',
|
| 40 |
],
|
| 41 |
)
|
| 42 |
return qa_prompt
|
| 43 |
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
def create_vectordb(file_path):
|
| 46 |
"""
|
| 47 |
This function creates a vectorstore from a file.
|
|
|
|
| 7 |
from langchain.chains import ConversationalRetrievalChain
|
| 8 |
from langchain.chat_models import ChatOpenAI
|
| 9 |
from langchain.document_loaders import UnstructuredFileLoader
|
| 10 |
+
from langchain.document_loaders import UnstructuredURLLoader
|
| 11 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
| 12 |
from langchain.memory import ConversationBufferMemory
|
| 13 |
from langchain.prompts import PromptTemplate
|
|
|
|
| 30 |
"""
|
| 31 |
This function creates a prompt template that will be used to generate the prompt for the model.
|
| 32 |
"""
|
| 33 |
+
template = """Only use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. If you find an answer, explain the reasoning behind it. Don't make up new terms which are not available in the context.
|
| 34 |
---
|
| 35 |
Context: {context}
|
| 36 |
Question: {question}
|
| 37 |
Answer:"""
|
| 38 |
qa_prompt = PromptTemplate(
|
| 39 |
template=template, input_variables=[
|
| 40 |
+
'question', 'context',
|
| 41 |
],
|
| 42 |
)
|
| 43 |
return qa_prompt
|
| 44 |
|
| 45 |
|
| 46 |
+
# def process_data():
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def save_file_locally(uploaded_file, dest):
|
| 50 |
+
with open(dest, 'wb') as hndl:
|
| 51 |
+
hndl.write(uploaded_file.getbuffer())
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_text_splitter():
|
| 55 |
+
"""
|
| 56 |
+
This function creates a text splitter.
|
| 57 |
+
"""
|
| 58 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 59 |
+
chunk_size=Config.chunk_size,
|
| 60 |
+
chunk_overlap=Config.chunk_overlap,
|
| 61 |
+
length_function=len,
|
| 62 |
+
)
|
| 63 |
+
return text_splitter
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def create_vectorstore(data):
|
| 67 |
+
text_splitter = get_text_splitter()
|
| 68 |
+
documents = text_splitter.split_documents(data)
|
| 69 |
+
embeddings = OpenAIEmbeddings()
|
| 70 |
+
vectorstore = FAISS.from_documents(documents, embeddings)
|
| 71 |
+
with open(Config.vectorstore_path, 'wb') as f:
|
| 72 |
+
pickle.dump(vectorstore, f)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def load_url(url):
|
| 76 |
+
loader = UnstructuredURLLoader(urls=[url])
|
| 77 |
+
data = loader.load()
|
| 78 |
+
import pdb
|
| 79 |
+
pdb.set_trace()
|
| 80 |
+
create_vectorstore(data)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def load_file(file):
|
| 84 |
+
loader = UnstructuredFileLoader(file)
|
| 85 |
+
data = loader.load()
|
| 86 |
+
create_vectorstore(data)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
def create_vectordb(file_path):
|
| 90 |
"""
|
| 91 |
This function creates a vectorstore from a file.
|