File size: 6,718 Bytes
65a8672
 
b8685e9
65a8672
 
cb76759
65a8672
 
 
 
 
 
 
 
 
 
474d141
65a8672
dd35685
cb76759
 
93b552b
cb76759
 
dd35685
 
cb76759
 
a942a76
dd35685
65a8672
523ef04
65a8672
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb76759
65a8672
 
 
 
 
 
 
52cc340
 
 
 
 
 
 
cb76759
 
fdf8a49
cb76759
 
65a8672
fdf8a49
cb76759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1acb91b
cb76759
65a8672
1acb91b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65a8672
 
 
 
474d141
 
cb76759
 
52cc340
1acb91b
cb76759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52cc340
1acb91b
cb76759
1acb91b
cb76759
 
 
 
 
 
474d141
cb76759
49a6cc6
cb76759
59f8385
65a8672
 
2efa2b1
 
 
a942a76
cb76759
 
c471578
cb76759
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229

import streamlit as st
st.session_state.em = "0"

import os
import json


import requests
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
import re

import os
import numpy as np

st.set_page_config(layout="wide")


# Path to the image
image_path = 'fire.jpg'

# Display the image with st.image
st.image(image_path, caption='', use_column_width=True)


started = 'docs' in st.session_state
exec(open('start2.py').read()) 



os.environ["OPENAI_API_KEY"]  = os.getenv('openkey')

def extract_text_from_pdf(pdf_path):
    # Open the provided PDF file
    doc = fitz.open(pdf_path)
    
    # Initialize a text variable to store all the text
    text = ""
    
    # Iterate through each page in the PDF
    for page_num in range(len(doc)):
        # Get a page
        page = doc.load_page(page_num)
        
        # Extract text from the page and add it to the text variable
        text += page.get_text()
    
    # Close the document
    doc.close()
    
    return text

def extract_text_from_pdf2(PDFfile):
    #import the PyPDF2 module
    import PyPDF2

    #open the PDF file
    PDFfile = open('pc.pdf', 'rb')

    PDFfilereader = PyPDF2.PdfReader(PDFfile)

    #print the number of pages
    print(PDFfilereader.pages)

    #provide the page number
    
    txt = ''
    
    for pages in PDFfilereader.pages:
        #extracting the text in PDF file
        txt = txt + pages.extract_text()

    #close the PDF file
    PDFfile.close()

    return txt


def strip_repeated_dots_and_blanks(text):
    # Replace multiple dots with a single dot
    text = re.sub(r'\.{2,}', '.', text)
    # Replace multiple spaces with a single space
    text = re.sub(r' {2,}', ' ', text)
    text = re.sub('\n \n', '\n\n', text)
    
    return text




# Title of the page
st.title('Peerstreet Question and Answer App')

# Text input for the question
question = st.text_input("Type your question here:")

# A button to submit the question
submit_button = st.button('Submit')

st.markdown("For best results keep questions simple and to the point and use words that are likely to be found in the documents")
st.markdown(""" Sample Questions: 
            
* When is the voting deadline?           
* What is the expected recovery for MPDN's?
            
            """)

# Create tabs
Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])


# Placeholder for displaying the answer
with Answer_tab:
    answer_placeholder = st.empty()
with Content_tab:
    content_placeholder = st.empty()
with Info_tab:
    st.markdown("""## Use at your own risk, accuracy of responses are not guaranteed. 

This app base its anwsers on 110 documents filed by the court. This does not include any scanned documents at this point 
as it takes more work to retrieve the text from them. It does include most orders filed by the court up to Feb 29th.


This is a simple RAG (retrieval augmented generation) system and does not consider order of events when 
retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the 
response is presented in the content tab with link to the full document so that you can read the details in its proper context. 
                

""" )
            


    with open('results.json', 'r') as file:
        content = file.read()


    data_to_download = content.encode()

    # Create a download button
    st.download_button(label="Download Prior responses",
                   data=data_to_download,
                   file_name="results.json",
                   mime="json")



# Logic to display an answer when the submit button is pressed
if submit_button:
    if question:  # Check if there is a question typed
        # Process the question here (a placeholder answer is used in this example)
        try:
            if started: 
                #Awnser = rag_chain.invoke(question)
                #contexts = retriever.get_relevant_documents(question)
                answer, selected_items,selected_sources,titles,dates,selected_chunks,highest_simularities = ask(question)
                answer_placeholder.markdown(escape_markdown(answer))  # Display the answer

                    # Prepare the data to be saved
                

                data_to_save = {
                    "query": question,
                    "answer": answer,
                    "selected_items": selected_items,
                    "selected_sources": selected_sources,
                    "selected_chunks": selected_chunks,
                    "highest_similarities": [f"{sim:.2f}" for sim in highest_simularities]
                }
                
                # The file to which the data will be appended
                file_path = 'results.json'
                
                try:
                    # Read the existing content of the file
                    with open(file_path, 'r') as file:
                        existing_data = json.load(file)
                except (FileNotFoundError, json.JSONDecodeError):
                    # If the file doesn't exist or is empty, start with an empty list
                    existing_data = []
                
                # Append the new data
                existing_data.append(data_to_save)
                
                # Write the updated data back to the file
                with open(file_path, 'w') as file:
                    json.dump(existing_data, file, indent=4)
 



                url = 'https://cases.stretto.com/public/x247/12208/PLEADINGS/'





                string = ""
                for k in range(len(selected_items)):
                    temp = " [" +  titles[k] + "](" + url  +  selected_sources[k] +   ")" +  "  text block: " + selected_chunks[k]   + "   Relevance: " +f"{highest_simularities[k]:.2f}" + "  Date:" + dates[k]
                    

                    string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k])  + "\n\n  source:"  + temp  + "\n"


                content_placeholder.markdown(string) 



            else:
                answer_placeholder.markdown("Waiting for system to wake up   "+     st.session_state.ln + "    " + st.session_state.em )    
        except Exception as e:
            answer_placeholder.markdown(e)  # Display the answer
         
    else:
        answer_placeholder.warning("Please type a question.")



    
#if 'retriever' not in st.session_state:
#    st.session_state.em = "mm"

#if 'retriever' not in st.session_state:
#    st.session_state.em = "1"
#    exec(open('start.py').read())
#    st.session_state.em = "2"