import streamlit as st st.session_state.em = "0" import os import json import requests from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter import re import os import numpy as np st.set_page_config(layout="wide") # Path to the image image_path = 'fire.jpg' # Display the image with st.image st.image(image_path, caption='', use_column_width=True) started = 'docs' in st.session_state exec(open('start2.py').read()) os.environ["OPENAI_API_KEY"] = os.getenv('openkey') def extract_text_from_pdf(pdf_path): # Open the provided PDF file doc = fitz.open(pdf_path) # Initialize a text variable to store all the text text = "" # Iterate through each page in the PDF for page_num in range(len(doc)): # Get a page page = doc.load_page(page_num) # Extract text from the page and add it to the text variable text += page.get_text() # Close the document doc.close() return text def extract_text_from_pdf2(PDFfile): #import the PyPDF2 module import PyPDF2 #open the PDF file PDFfile = open('pc.pdf', 'rb') PDFfilereader = PyPDF2.PdfReader(PDFfile) #print the number of pages print(PDFfilereader.pages) #provide the page number txt = '' for pages in PDFfilereader.pages: #extracting the text in PDF file txt = txt + pages.extract_text() #close the PDF file PDFfile.close() return txt def strip_repeated_dots_and_blanks(text): # Replace multiple dots with a single dot text = re.sub(r'\.{2,}', '.', text) # Replace multiple spaces with a single space text = re.sub(r' {2,}', ' ', text) text = re.sub('\n \n', '\n\n', text) return text # Title of the page st.title('Peerstreet Question and Answer App') # Text input for the question question = st.text_input("Type your question here:") # A button to submit the question submit_button = st.button('Submit') st.markdown("For best results keep questions simple and to the point and use words that are likely to be found in the documents") st.markdown(""" Sample Questions: * When is the voting deadline? * What is the expected recovery for MPDN's? """) # Create tabs Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"]) # Placeholder for displaying the answer with Answer_tab: answer_placeholder = st.empty() with Content_tab: content_placeholder = st.empty() with Info_tab: st.markdown("""## Use at your own risk, accuracy of responses are not guaranteed. This app base its anwsers on 110 documents filed by the court. This does not include any scanned documents at this point as it takes more work to retrieve the text from them. It does include most orders filed by the court up to Feb 29th. This is a simple RAG (retrieval augmented generation) system and does not consider order of events when retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the response is presented in the content tab with link to the full document so that you can read the details in its proper context. """ ) with open('results.json', 'r') as file: content = file.read() data_to_download = content.encode() # Create a download button st.download_button(label="Download Prior responses", data=data_to_download, file_name="results.json", mime="json") # Logic to display an answer when the submit button is pressed if submit_button: if question: # Check if there is a question typed # Process the question here (a placeholder answer is used in this example) try: if started: #Awnser = rag_chain.invoke(question) #contexts = retriever.get_relevant_documents(question) answer, selected_items,selected_sources,titles,dates,selected_chunks,highest_simularities = ask(question) answer_placeholder.markdown(escape_markdown(answer)) # Display the answer # Prepare the data to be saved data_to_save = { "query": question, "answer": answer, "selected_items": selected_items, "selected_sources": selected_sources, "selected_chunks": selected_chunks, "highest_similarities": [f"{sim:.2f}" for sim in highest_simularities] } # The file to which the data will be appended file_path = 'results.json' try: # Read the existing content of the file with open(file_path, 'r') as file: existing_data = json.load(file) except (FileNotFoundError, json.JSONDecodeError): # If the file doesn't exist or is empty, start with an empty list existing_data = [] # Append the new data existing_data.append(data_to_save) # Write the updated data back to the file with open(file_path, 'w') as file: json.dump(existing_data, file, indent=4) url = 'https://cases.stretto.com/public/x247/12208/PLEADINGS/' string = "" for k in range(len(selected_items)): temp = " [" + titles[k] + "](" + url + selected_sources[k] + ")" + " text block: " + selected_chunks[k] + " Relevance: " +f"{highest_simularities[k]:.2f}" + " Date:" + dates[k] string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k]) + "\n\n source:" + temp + "\n" content_placeholder.markdown(string) else: answer_placeholder.markdown("Waiting for system to wake up "+ st.session_state.ln + " " + st.session_state.em ) except Exception as e: answer_placeholder.markdown(e) # Display the answer else: answer_placeholder.warning("Please type a question.") #if 'retriever' not in st.session_state: # st.session_state.em = "mm" #if 'retriever' not in st.session_state: # st.session_state.em = "1" # exec(open('start.py').read()) # st.session_state.em = "2"