Tomas Larsson commited on
Commit
1acb91b
·
1 Parent(s): fdf8a49
Files changed (5) hide show
  1. app.py +21 -9
  2. embeddings.npy +2 -2
  3. start2.py +42 -6
  4. vectorstore.pkl +2 -2
  5. vectorstore2.pkl +0 -3
app.py CHANGED
@@ -103,7 +103,6 @@ submit_button = st.button('Submit')
103
  Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])
104
 
105
 
106
-
107
  # Placeholder for displaying the answer
108
  with Answer_tab:
109
  answer_placeholder = st.empty()
@@ -119,13 +118,26 @@ as it takes more work to retrieve the text from them. It does include most order
119
  This is a simple RAG (retrieval augmented generation) system and does not consider order of events when
120
  retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the
121
  response is presented in the content tab with link to the full document so that you can read the details in its proper context.
122
-
123
 
124
 
125
- """
126
- )
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # Logic to display an answer when the submit button is pressed
130
  if submit_button:
131
  if question: # Check if there is a question typed
@@ -134,8 +146,8 @@ if submit_button:
134
  if started:
135
  #Awnser = rag_chain.invoke(question)
136
  #contexts = retriever.get_relevant_documents(question)
137
- answer, selected_items,selected_sources,selected_chunks,highest_simularities = ask(question)
138
- answer_placeholder.markdown(answer) # Display the answer
139
 
140
  # Prepare the data to be saved
141
 
@@ -178,10 +190,10 @@ if submit_button:
178
 
179
  string = ""
180
  for k in range(len(selected_items)):
181
- temp = " [" + selected_sources[k] + "](" + url + selected_sources[k] + ")" + " text block: " + selected_chunks[k] + " Relevance: " +f"{highest_simularities[k]:.2f}"
182
-
183
 
184
- string = string + "### Paragraph used. \n" + selected_items[k] + "\n\n source:" + temp + "\n"
185
 
186
 
187
  content_placeholder.markdown(string)
 
103
  Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])
104
 
105
 
 
106
  # Placeholder for displaying the answer
107
  with Answer_tab:
108
  answer_placeholder = st.empty()
 
118
  This is a simple RAG (retrieval augmented generation) system and does not consider order of events when
119
  retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the
120
  response is presented in the content tab with link to the full document so that you can read the details in its proper context.
 
121
 
122
 
123
+ """ )
 
124
 
125
 
126
+
127
+ with open('results.json', 'r') as file:
128
+ content = file.read()
129
+
130
+
131
+ data_to_download = content.encode()
132
+
133
+ # Create a download button
134
+ st.download_button(label="Download Prior responses",
135
+ data=data_to_download,
136
+ file_name="results.json",
137
+ mime="json")
138
+
139
+
140
+
141
  # Logic to display an answer when the submit button is pressed
142
  if submit_button:
143
  if question: # Check if there is a question typed
 
146
  if started:
147
  #Awnser = rag_chain.invoke(question)
148
  #contexts = retriever.get_relevant_documents(question)
149
+ answer, selected_items,selected_sources,titles,selected_chunks,highest_simularities = ask(question)
150
+ answer_placeholder.markdown(escape_markdown(answer)) # Display the answer
151
 
152
  # Prepare the data to be saved
153
 
 
190
 
191
  string = ""
192
  for k in range(len(selected_items)):
193
+ temp = " [" + titles[k] + "](" + url + selected_sources[k] + ")" + " text block: " + selected_chunks[k] + " Relevance: " +f"{highest_simularities[k]:.2f}"
194
+
195
 
196
+ string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k]) + "\n\n source:" + temp + "\n"
197
 
198
 
199
  content_placeholder.markdown(string)
embeddings.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e59b70d53bc2779e24f76bcc2377fd60b9d3cdabf20b26cd8cfc176ec316292
3
- size 66072704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578f2b44347eaff3137c23cbeb70ece93360e78c3d447ca9e4d825296a8bd0b3
3
+ size 43131008
start2.py CHANGED
@@ -8,6 +8,39 @@ from langchain.schema.runnable import RunnablePassthrough
8
  from langchain.schema.output_parser import StrOutputParser
9
  from scipy.spatial.distance import cosine
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if not started:
12
  print("------------starting------------")
13
 
@@ -18,7 +51,7 @@ if not started:
18
 
19
 
20
  # Path to the pickle file where you want to save your data
21
- pickle_file_path = 'vectorstore2.pkl'
22
 
23
 
24
  with open(pickle_file_path, 'rb') as file:
@@ -106,13 +139,15 @@ def askq(query):
106
 
107
 
108
  # Combine content and question into a single prompt
109
- prompt = f"""Anwser question provided given content. If an awnser can't be found in the provided ontent respond that you could not find
110
  the awnser to the question, applogice and say that you will instead tell a lawyer joke follow that with the joke.
111
  Content: {content}\n\nQuestion: {question}\nAnswer:"""
112
 
113
 
114
  # Define LLM
115
- llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
 
 
116
  Awnser = llm.invoke(prompt).content
117
 
118
  return Awnser, selected_items,selected_sources,selected_chunks,highest_simularities
@@ -152,13 +187,14 @@ def ask(query):
152
  selected_items = [docs[i].page_content for i in highest_indices]
153
  selected_sources = [docs[i].metadata['source'] for i in highest_indices]
154
  selected_chunks = [docs[i].metadata['chunk'] for i in highest_indices]
 
155
 
156
  # Combine selected items into a single content string
157
  content = ' '.join(selected_items)
158
 
159
  # Prepare the prompt
160
- prompt = f"""Answer the question provided given the content. If an answer can't be found in the provided content,
161
- respond that you could not find the answer to the question, apologize and instead provide a suggestion for where to search for more information related to teh question.
162
 
163
  -------------------
164
  Content: {content}\n\nQuestion: {question}\nAnswer:
@@ -170,4 +206,4 @@ def ask(query):
170
  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
171
  answer = llm.invoke(prompt).content
172
 
173
- return answer, selected_items, selected_sources, selected_chunks, highest_simularities
 
8
  from langchain.schema.output_parser import StrOutputParser
9
  from scipy.spatial.distance import cosine
10
 
11
+
12
+ def find_first_with_docket(items):
13
+ # Loop through each item in the list
14
+ k = 0
15
+ for item in items:
16
+ # Check if "docket" is in the item (case-insensitive search)
17
+ if "docket" in item.lower():
18
+ return item
19
+ k = k + 1
20
+ # Return None if no item contains "docket"
21
+ return 0
22
+
23
+
24
+
25
+ def escape_markdownold(text):
26
+ # List of markdown special characters to escape
27
+ special_chars = r"\*|_|#|\{|\}|\[|\]|\(|\)|\#|\+|\-|\.|\!|\\"
28
+
29
+ # Use regex sub function to escape special characters by adding a backslash before them
30
+ escaped_text = re.sub(f"([{special_chars}])", r"\\\1", text)
31
+
32
+ return escaped_text
33
+
34
+ def escape_markdown(text):
35
+ # List of special characters in markdown that need escaping
36
+ markdown_chars = ["\\", "`", "*", "_", "{", "}", "[", "]", "(", ")", "#", "+", "-", ".", "!", "|", ">","$"]
37
+ # Escape each character with a backslash
38
+ for char in markdown_chars:
39
+ text = text.replace(char, "\\" + char)
40
+ return text
41
+
42
+
43
+
44
  if not started:
45
  print("------------starting------------")
46
 
 
51
 
52
 
53
  # Path to the pickle file where you want to save your data
54
+ pickle_file_path = 'vectorstore.pkl'
55
 
56
 
57
  with open(pickle_file_path, 'rb') as file:
 
139
 
140
 
141
  # Combine content and question into a single prompt
142
+ prompt = f"""Anwser the question or request provided given content. If an awnser can't be found in the provided ontent respond that you could not find
143
  the awnser to the question, applogice and say that you will instead tell a lawyer joke follow that with the joke.
144
  Content: {content}\n\nQuestion: {question}\nAnswer:"""
145
 
146
 
147
  # Define LLM
148
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2)
149
+ #llm = ChatOpenAI(model_name="gpt-4", temperature=0.2)
150
+
151
  Awnser = llm.invoke(prompt).content
152
 
153
  return Awnser, selected_items,selected_sources,selected_chunks,highest_simularities
 
187
  selected_items = [docs[i].page_content for i in highest_indices]
188
  selected_sources = [docs[i].metadata['source'] for i in highest_indices]
189
  selected_chunks = [docs[i].metadata['chunk'] for i in highest_indices]
190
+ titles = [docs[i].metadata['title'] for i in highest_indices]
191
 
192
  # Combine selected items into a single content string
193
  content = ' '.join(selected_items)
194
 
195
  # Prepare the prompt
196
+ prompt = f"""Answer the question or request provided given the content. If an answer can't be found in the provided content,
197
+ respond that you could not find the answer to the question, apologize and instead provide a suggestion for where to search for more information related to the question.
198
 
199
  -------------------
200
  Content: {content}\n\nQuestion: {question}\nAnswer:
 
206
  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
207
  answer = llm.invoke(prompt).content
208
 
209
+ return answer, selected_items, selected_sources, titles, selected_chunks, highest_simularities
vectorstore.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:507fc427f4ebce75592e4035cb1b4f8601f46d82c3170f5529e21c5844fc8440
3
- size 5553356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f335b6791bcf72f95e705e8418e6ba343585df5b6bb1bedf7e738f9a2b698f
3
+ size 5449252
vectorstore2.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eeb601bfdd128945a52712a20a89f9bfd89c85ea1d25215d552f68ca094b012
3
- size 5582531