Spaces:

tlarsson
/

psdocuments

Sleeping

File size: 6,718 Bytes


import streamlit as st
st.session_state.em = "0"

import os
import json


import requests
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import re

import os
import numpy as np

st.set_page_config(layout="wide")


# Path to the image
image_path = 'fire.jpg'

# Display the image with st.image
st.image(image_path, caption='', use_column_width=True)


started = 'docs' in st.session_state
exec(open('start2.py').read()) 



os.environ["OPENAI_API_KEY"]  = os.getenv('openkey')

def extract_text_from_pdf(pdf_path):
    # Open the provided PDF file
    doc = fitz.open(pdf_path)
    
    # Initialize a text variable to store all the text
    text = ""
    
    # Iterate through each page in the PDF
    for page_num in range(len(doc)):
        # Get a page
        page = doc.load_page(page_num)
        
        # Extract text from the page and add it to the text variable
        text += page.get_text()
    
    # Close the document
    doc.close()
    
    return text

def extract_text_from_pdf2(PDFfile):
    #import the PyPDF2 module
    import PyPDF2

    #open the PDF file
    PDFfile = open('pc.pdf', 'rb')

    PDFfilereader = PyPDF2.PdfReader(PDFfile)

    #print the number of pages
    print(PDFfilereader.pages)

    #provide the page number
    
    txt = ''
    
    for pages in PDFfilereader.pages:
        #extracting the text in PDF file
        txt = txt + pages.extract_text()

    #close the PDF file
    PDFfile.close()

    return txt


def strip_repeated_dots_and_blanks(text):
    # Replace multiple dots with a single dot
    text = re.sub(r'\.{2,}', '.', text)
    # Replace multiple spaces with a single space
    text = re.sub(r' {2,}', ' ', text)
    text = re.sub('\n \n', '\n\n', text)
    
    return text




# Title of the page
st.title('Peerstreet Question and Answer App')

# Text input for the question
question = st.text_input("Type your question here:")

# A button to submit the question
submit_button = st.button('Submit')

st.markdown("For best results keep questions simple and to the point and use words that are likely to be found in the documents")
st.markdown(""" Sample Questions: 
            
* When is the voting deadline?           
* What is the expected recovery for MPDN's?
            
            """)

# Create tabs
Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])


# Placeholder for displaying the answer
with Answer_tab:
    answer_placeholder = st.empty()
with Content_tab:
    content_placeholder = st.empty()
with Info_tab:
    st.markdown("""## Use at your own risk, accuracy of responses are not guaranteed. 

This app base its anwsers on 110 documents filed by the court. This does not include any scanned documents at this point 
as it takes more work to retrieve the text from them. It does include most orders filed by the court up to Feb 29th.


This is a simple RAG (retrieval augmented generation) system and does not consider order of events when 
retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the 
response is presented in the content tab with link to the full document so that you can read the details in its proper context. 
                

""" )
            


    with open('results.json', 'r') as file:
        content = file.read()


    data_to_download = content.encode()

    # Create a download button
    st.download_button(label="Download Prior responses",
                   data=data_to_download,
                   file_name="results.json",
                   mime="json")



# Logic to display an answer when the submit button is pressed
if submit_button:
    if question:  # Check if there is a question typed
        # Process the question here (a placeholder answer is used in this example)
        try:
            if started: 
                #Awnser = rag_chain.invoke(question)
                #contexts = retriever.get_relevant_documents(question)
                answer, selected_items,selected_sources,titles,dates,selected_chunks,highest_simularities = ask(question)
                answer_placeholder.markdown(escape_markdown(answer))  # Display the answer

                    # Prepare the data to be saved
                

                data_to_save = {
                    "query": question,
                    "answer": answer,
                    "selected_items": selected_items,
                    "selected_sources": selected_sources,
                    "selected_chunks": selected_chunks,
                    "highest_similarities": [f"{sim:.2f}" for sim in highest_simularities]
                }
                
                # The file to which the data will be appended
                file_path = 'results.json'
                
                try:
                    # Read the existing content of the file
                    with open(file_path, 'r') as file:
                        existing_data = json.load(file)
                except (FileNotFoundError, json.JSONDecodeError):
                    # If the file doesn't exist or is empty, start with an empty list
                    existing_data = []
                
                # Append the new data
                existing_data.append(data_to_save)
                
                # Write the updated data back to the file
                with open(file_path, 'w') as file:
                    json.dump(existing_data, file, indent=4)
 



                url = 'https://cases.stretto.com/public/x247/12208/PLEADINGS/'





                string = ""
                for k in range(len(selected_items)):
                    temp = " [" +  titles[k] + "](" + url  +  selected_sources[k] +   ")" +  "  text block: " + selected_chunks[k]   + "   Relevance: " +f"{highest_simularities[k]:.2f}" + "  Date:" + dates[k]
                    

                    string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k])  + "\n\n  source:"  + temp  + "\n"


                content_placeholder.markdown(string) 



            else:
                answer_placeholder.markdown("Waiting for system to wake up   "+     st.session_state.ln + "    " + st.session_state.em )    
        except Exception as e:
            answer_placeholder.markdown(e)  # Display the answer
         
    else:
        answer_placeholder.warning("Please type a question.")



    
#if 'retriever' not in st.session_state:
#    st.session_state.em = "mm"

#if 'retriever' not in st.session_state:
#    st.session_state.em = "1"
#    exec(open('start.py').read())
#    st.session_state.em = "2"