Spaces:

qmaruf
/

talk-to-data

Runtime error

App Files Files Community

qmaruf commited on Oct 21, 2023

Commit

234623e

1 Parent(s): f0e70d4

feature added to get data from url

Browse files

Files changed (2) hide show

app.py +26 -11
utils.py +46 -2

app.py CHANGED Viewed

@@ -12,11 +12,13 @@ from loguru import logger
 from config import Config
 from utils import create_vectordb
 from utils import get_qa_chain
 load_dotenv()
 openai.api_key = os.environ['OPENAI_API_KEY']
 if 'messages' not in st.session_state:
     st.session_state.messages = []
@@ -25,34 +27,47 @@ for message in st.session_state.messages:
         st.markdown(message['content'])
 uploaded_file = st.sidebar.file_uploader('Upload a file', type=['pdf', 'txt'])
 def set_status():
     if uploaded_file is None:
-        Path(Config.vectorstore_path).unlink(missing_ok=True)
         st.sidebar.info('Upoad a file to start a conversation')
     else:
         st.sidebar.info(f'Let"s talk to {Path(uploaded_file.name)}')
-def process_uploaded_file(uploaded_file):
     if 'context' not in st.session_state:
-        logger.info(f'file uploaded {uploaded_file}')
-        upath = f'docs/{uploaded_file.name}'
-        logger.info(f'file saved to {upath}')
-        with open(upath, 'wb') as hndl:
-            hndl.write(uploaded_file.getbuffer())
-        create_vectordb(upath)
         st.session_state['context'] = True
 set_status()
-if uploaded_file is not None:
-    process_uploaded_file(uploaded_file)
     qr_chain = get_qa_chain()
     if prompt := st.chat_input('Send a message'):

 from config import Config
 from utils import create_vectordb
 from utils import get_qa_chain
+from utils import load_file
+from utils import load_url
+from utils import save_file_locally
 load_dotenv()
 openai.api_key = os.environ['OPENAI_API_KEY']
 if 'messages' not in st.session_state:
     st.session_state.messages = []
         st.markdown(message['content'])
 uploaded_file = st.sidebar.file_uploader('Upload a file', type=['pdf', 'txt'])
+doc_url = st.sidebar.text_input('Or enter a URL to a document')
+if uploaded_file is not None and doc_url != '':
+    st.sidebar.error('Please choose one or the other')
+    st.stop()
 def set_status():
     if uploaded_file is None:
+        # Path(Config.vectorstore_path).unlink(missing_ok=True)
         st.sidebar.info('Upoad a file to start a conversation')
     else:
         st.sidebar.info(f'Let"s talk to {Path(uploaded_file.name)}')
+def process_data(data, data_type):
     if 'context' not in st.session_state:
+        if data_type == 'file':
+            upath = f'docs/{uploaded_file.name}'
+            save_file_locally(data, upath)
+            load_file(upath)
+        else:
+            load_url(data)
+        st.session_state['context'] = True
+def process_uploaded_doc():
+    if 'context' not in st.session_state:
+        loader = Uns
         st.session_state['context'] = True
 set_status()
+if uploaded_file is not None or doc_url != '':
+    if uploaded_file is not None:
+        process_data(uploaded_file, data_type='file')
+    else:
+        process_data(doc_url, data_type='url')
     qr_chain = get_qa_chain()
     if prompt := st.chat_input('Send a message'):

utils.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dotenv import load_dotenv
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import UnstructuredFileLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
@@ -29,19 +30,62 @@ def get_prompt():
     """
     This function creates a prompt template that will be used to generate the prompt for the model.
     """
-    template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
     ---
     Context: {context}
     Question: {question}
     Answer:"""
     qa_prompt = PromptTemplate(
         template=template, input_variables=[
-            'question', 'context', 'chat_history',
         ],
     )
     return qa_prompt
 def create_vectordb(file_path):
     """
     This function creates a vectorstore from a file.

 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import UnstructuredFileLoader
+from langchain.document_loaders import UnstructuredURLLoader
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
     """
     This function creates a prompt template that will be used to generate the prompt for the model.
     """
+    template = """Only use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. If you find an answer, explain the reasoning behind it. Don't make up new terms which are not available in the context.
     ---
     Context: {context}
     Question: {question}
     Answer:"""
     qa_prompt = PromptTemplate(
         template=template, input_variables=[
+            'question', 'context',
         ],
     )
     return qa_prompt
+# def process_data():
+def save_file_locally(uploaded_file, dest):
+    with open(dest, 'wb') as hndl:
+        hndl.write(uploaded_file.getbuffer())
+def get_text_splitter():
+    """
+    This function creates a text splitter.
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=Config.chunk_size,
+        chunk_overlap=Config.chunk_overlap,
+        length_function=len,
+    )
+    return text_splitter
+def create_vectorstore(data):
+    text_splitter = get_text_splitter()
+    documents = text_splitter.split_documents(data)
+    embeddings = OpenAIEmbeddings()
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    with open(Config.vectorstore_path, 'wb') as f:
+        pickle.dump(vectorstore, f)
+def load_url(url):
+    loader = UnstructuredURLLoader(urls=[url])
+    data = loader.load()
+    import pdb
+    pdb.set_trace()
+    create_vectorstore(data)
+def load_file(file):
+    loader = UnstructuredFileLoader(file)
+    data = loader.load()
+    create_vectorstore(data)
 def create_vectordb(file_path):
     """
     This function creates a vectorstore from a file.