Spaces:
Paused
Paused
| import streamlit as st | |
| import os | |
| from constants import search_number_messages | |
| from langchain_utils import initialize_chat_conversation | |
| from search_indexing import download_and_index_pdf | |
| import re | |
| def remove_url(url_to_remove): | |
| """ | |
| Remove URLs from the session_state. Triggered by the respective button | |
| """ | |
| if url_to_remove in st.session_state.urls: | |
| st.session_state.urls.remove(url_to_remove) | |
| # Page title | |
| st.set_page_config(page_title='Talk with PDFs using LLMs - Beta') | |
| st.title('Talk with PDFs using LLMs - (Beta)') | |
| # Initialize the faiss_index key in the session state. This can be used to avoid having to download and embed the same PDF | |
| # every time the user asks a question | |
| if 'faiss_index' not in st.session_state: | |
| st.session_state['faiss_index'] = { | |
| 'indexed_urls': [], | |
| 'index': None | |
| } | |
| # Initialize conversation memory used by Langchain | |
| if 'conversation_memory' not in st.session_state: | |
| st.session_state['conversation_memory'] = None | |
| # Initialize chat history used by StreamLit (for display purposes) | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Store the URLs added by the user in the UI | |
| if 'urls' not in st.session_state: | |
| st.session_state.urls = [] | |
| with st.sidebar: | |
| openai_api_key = st.text_input('Step 1 - OpenAI API Key:', type='password') | |
| # Add/Remove URLs form | |
| with st.form('urls-form', clear_on_submit=True): | |
| url = st.text_input('Step 2 - URLs to relevant PDFs: ') | |
| add_url_button = st.form_submit_button('Add') | |
| if add_url_button: | |
| if url not in st.session_state.urls: | |
| st.session_state.urls.append(url) | |
| # Display a container with the URLs added by the user so far | |
| with st.container(): | |
| if st.session_state.urls: | |
| st.header('URLs added:') | |
| for url in st.session_state.urls: | |
| st.write(url) | |
| st.button(label='Remove', key=f"Remove {url}", on_click=remove_url, kwargs={'url_to_remove': url}) | |
| st.divider() | |
| # Display chat messages from history on app rerun | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # React to user input | |
| if query_text := st.chat_input("Your message"): | |
| os.environ['OPENAI_API_KEY'] = openai_api_key | |
| # Display user message in chat message container, and append to session state | |
| st.chat_message("user").markdown(query_text) | |
| st.session_state.messages.append({"role": "user", "content": query_text}) | |
| # Check if FAISS index already exists, or if it needs to be created as it includes new URLs | |
| session_urls = st.session_state.urls | |
| if st.session_state['faiss_index']['index'] is None or set(st.session_state['faiss_index']['indexed_urls']) != set(session_urls): | |
| st.session_state['faiss_index']['indexed_urls'] = session_urls | |
| with st.spinner('Downloading and indexing PDFs...'): | |
| faiss_index = download_and_index_pdf(session_urls) | |
| st.session_state['faiss_index']['index'] = faiss_index | |
| else: | |
| faiss_index = st.session_state['faiss_index']['index'] | |
| # Check if conversation memory has already been initialized and is part of the session state | |
| if st.session_state['conversation_memory'] is None: | |
| conversation = initialize_chat_conversation(faiss_index) | |
| st.session_state['conversation_memory'] = conversation | |
| else: | |
| conversation = st.session_state['conversation_memory'] | |
| # Search PDF snippets using the last few user messages | |
| user_messages_history = [message['content'] for message in st.session_state.messages[-search_number_messages:] if message['role'] == 'user'] | |
| user_messages_history = '\n'.join(user_messages_history) | |
| with st.spinner('Querying OpenAI GPT...'): | |
| response = conversation.predict(input=query_text, user_messages_history=user_messages_history) | |
| # Display assistant response in chat message container | |
| with st.chat_message("assistant"): | |
| st.markdown(response) | |
| snippet_memory = conversation.memory.memories[1] | |
| for page_number, snippet in zip(snippet_memory.pages, snippet_memory.snippets): | |
| with st.expander(f'Snippet from page {page_number + 1}'): | |
| # Remove the <START> and <END> tags from the snippets before displaying them | |
| snippet = re.sub("<START_SNIPPET_PAGE_\d+>", '', snippet) | |
| snippet = re.sub("<END_SNIPPET_PAGE_\d+>", '', snippet) | |
| st.markdown(snippet) | |
| # Add assistant response to chat history | |
| st.session_state.messages.append({"role": "assistant", "content": response}) | |