Spaces:

tlarsson
/

psdocuments

Sleeping

App Files Files Community

Tomas Larsson commited on Mar 18, 2024

Commit

1acb91b

1 Parent(s): fdf8a49

V0

Browse files

Files changed (5) hide show

app.py +21 -9
embeddings.npy +2 -2
start2.py +42 -6
vectorstore.pkl +2 -2
vectorstore2.pkl +0 -3

app.py CHANGED Viewed

@@ -103,7 +103,6 @@ submit_button = st.button('Submit')
 Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])
 # Placeholder for displaying the answer
 with Answer_tab:
     answer_placeholder = st.empty()
@@ -119,13 +118,26 @@ as it takes more work to retrieve the text from them. It does include most order
 This is a simple RAG (retrieval augmented generation) system and does not consider order of events when
 retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the
 response is presented in the content tab with link to the full document so that you can read the details in its proper context.
-"""
-                )
 # Logic to display an answer when the submit button is pressed
 if submit_button:
     if question:  # Check if there is a question typed
@@ -134,8 +146,8 @@ if submit_button:
             if started:
                 #Awnser = rag_chain.invoke(question)
                 #contexts = retriever.get_relevant_documents(question)
-                answer, selected_items,selected_sources,selected_chunks,highest_simularities = ask(question)
-                answer_placeholder.markdown(answer)  # Display the answer
                     # Prepare the data to be saved
@@ -178,10 +190,10 @@ if submit_button:
                 string = ""
                 for k in range(len(selected_items)):
-                    temp = " [" +  selected_sources[k] + "](" + url  +  selected_sources[k] +   ")" +  "  text block: " + selected_chunks[k]   + "   Relevance: " +f"{highest_simularities[k]:.2f}"
-                    string = string + "### Paragraph used. \n" + selected_items[k]  + "\n\n  source:"  + temp  + "\n"
                 content_placeholder.markdown(string)

 Answer_tab, Content_tab, Info_tab = st.tabs(["Answer", "Content used to create answer", "Infrmation about this app"])
 # Placeholder for displaying the answer
 with Answer_tab:
     answer_placeholder = st.empty()
 This is a simple RAG (retrieval augmented generation) system and does not consider order of events when
 retrieving onformation and generating responses. It can also easily missinterpret information, but information used to generate the
 response is presented in the content tab with link to the full document so that you can read the details in its proper context.
+""" )
+    with open('results.json', 'r') as file:
+        content = file.read()
+    data_to_download = content.encode()
+    # Create a download button
+    st.download_button(label="Download Prior responses",
+                   data=data_to_download,
+                   file_name="results.json",
+                   mime="json")
 # Logic to display an answer when the submit button is pressed
 if submit_button:
     if question:  # Check if there is a question typed
             if started:
                 #Awnser = rag_chain.invoke(question)
                 #contexts = retriever.get_relevant_documents(question)
+                answer, selected_items,selected_sources,titles,selected_chunks,highest_simularities = ask(question)
+                answer_placeholder.markdown(escape_markdown(answer))  # Display the answer
                     # Prepare the data to be saved
                 string = ""
                 for k in range(len(selected_items)):
+                    temp = " [" +  titles[k] + "](" + url  +  selected_sources[k] +   ")" +  "  text block: " + selected_chunks[k]   + "   Relevance: " +f"{highest_simularities[k]:.2f}"
+                    string = string + "### Paragraph used. \n" + escape_markdown(selected_items[k])  + "\n\n  source:"  + temp  + "\n"
                 content_placeholder.markdown(string)

embeddings.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e59b70d53bc2779e24f76bcc2377fd60b9d3cdabf20b26cd8cfc176ec316292
-size 66072704

 version https://git-lfs.github.com/spec/v1
+oid sha256:578f2b44347eaff3137c23cbeb70ece93360e78c3d447ca9e4d825296a8bd0b3
+size 43131008

start2.py CHANGED Viewed

@@ -8,6 +8,39 @@ from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
 from scipy.spatial.distance import cosine
 if not started:
     print("------------starting------------")
@@ -18,7 +51,7 @@ if not started:
     # Path to the pickle file where you want to save your data
-    pickle_file_path = 'vectorstore2.pkl'
     with open(pickle_file_path, 'rb') as file:
@@ -106,13 +139,15 @@ def askq(query):
     # Combine content and question into a single prompt
-    prompt = f"""Anwser question provided given content. If an awnser can't be found in the provided ontent respond that you could not find
     the awnser to the question, applogice and say that you will instead tell a lawyer joke follow that with the joke.
     Content: {content}\n\nQuestion: {question}\nAnswer:"""
     # Define LLM
-    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
     Awnser = llm.invoke(prompt).content
     return Awnser, selected_items,selected_sources,selected_chunks,highest_simularities
@@ -152,13 +187,14 @@ def ask(query):
     selected_items = [docs[i].page_content for i in highest_indices]
     selected_sources = [docs[i].metadata['source'] for i in highest_indices]
     selected_chunks = [docs[i].metadata['chunk'] for i in highest_indices]
     # Combine selected items into a single content string
     content = ' '.join(selected_items)
     # Prepare the prompt
-    prompt = f"""Answer the question provided given the content. If an answer can't be found in the provided content,
-    respond that you could not find the answer to the question, apologize and instead provide a suggestion for where to search for more information related to teh question.
     -------------------
     Content: {content}\n\nQuestion: {question}\nAnswer:
@@ -170,4 +206,4 @@ def ask(query):
     llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
     answer = llm.invoke(prompt).content
-    return answer, selected_items, selected_sources, selected_chunks, highest_simularities

 from langchain.schema.output_parser import StrOutputParser
 from scipy.spatial.distance import cosine
+def find_first_with_docket(items):
+    # Loop through each item in the list
+    k = 0
+    for item in items:
+        # Check if "docket" is in the item (case-insensitive search)
+        if "docket" in item.lower():
+            return item
+        k = k + 1
+    # Return None if no item contains "docket"
+    return 0
+def escape_markdownold(text):
+    # List of markdown special characters to escape
+    special_chars = r"\*|_|#|\{|\}|\[|\]|\(|\)|\#|\+|\-|\.|\!|\\"
+    # Use regex sub function to escape special characters by adding a backslash before them
+    escaped_text = re.sub(f"([{special_chars}])", r"\\\1", text)
+    return escaped_text
+def escape_markdown(text):
+    # List of special characters in markdown that need escaping
+    markdown_chars = ["\\", "`", "*", "_", "{", "}", "[", "]", "(", ")", "#", "+", "-", ".", "!", "|", ">","$"]
+    # Escape each character with a backslash
+    for char in markdown_chars:
+        text = text.replace(char, "\\" + char)
+    return text
 if not started:
     print("------------starting------------")
     # Path to the pickle file where you want to save your data
+    pickle_file_path = 'vectorstore.pkl'
     with open(pickle_file_path, 'rb') as file:
     # Combine content and question into a single prompt
+    prompt = f"""Anwser the question or request provided given content. If an awnser can't be found in the provided ontent respond that you could not find
     the awnser to the question, applogice and say that you will instead tell a lawyer joke follow that with the joke.
     Content: {content}\n\nQuestion: {question}\nAnswer:"""
     # Define LLM
+    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2)
+    #llm = ChatOpenAI(model_name="gpt-4", temperature=0.2)
     Awnser = llm.invoke(prompt).content
     return Awnser, selected_items,selected_sources,selected_chunks,highest_simularities
     selected_items = [docs[i].page_content for i in highest_indices]
     selected_sources = [docs[i].metadata['source'] for i in highest_indices]
     selected_chunks = [docs[i].metadata['chunk'] for i in highest_indices]
+    titles = [docs[i].metadata['title'] for i in highest_indices]
     # Combine selected items into a single content string
     content = ' '.join(selected_items)
     # Prepare the prompt
+    prompt = f"""Answer the question or request provided given the content. If an answer can't be found in the provided content,
+    respond that you could not find the answer to the question, apologize and instead provide a suggestion for where to search for more information related to the question.
     -------------------
     Content: {content}\n\nQuestion: {question}\nAnswer:
     llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
     answer = llm.invoke(prompt).content
+    return answer, selected_items, selected_sources, titles, selected_chunks, highest_simularities

vectorstore.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:507fc427f4ebce75592e4035cb1b4f8601f46d82c3170f5529e21c5844fc8440
-size 5553356

 version https://git-lfs.github.com/spec/v1
+oid sha256:54f335b6791bcf72f95e705e8418e6ba343585df5b6bb1bedf7e738f9a2b698f
+size 5449252

vectorstore2.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0eeb601bfdd128945a52712a20a89f9bfd89c85ea1d25215d552f68ca094b012
-size 5582531