Spaces:
Sleeping
Sleeping
Zwea Htet commited on
Commit ·
a43a4a7
1
Parent(s): 781a2e4
updated code
Browse files- app.py +47 -25
- pdf/NDA for Student Interns.pdf +0 -0
app.py
CHANGED
|
@@ -3,16 +3,17 @@
|
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from langchain_community.document_loaders.pdf import PyPDFLoader
|
| 6 |
-
from
|
| 7 |
-
from
|
|
|
|
| 8 |
from langchain.memory import ConversationBufferMemory
|
| 9 |
from langchain_core.prompts import ChatPromptTemplate
|
| 10 |
-
from langchain.chains import ConversationalRetrievalChain,
|
| 11 |
import openai
|
| 12 |
from dotenv import load_dotenv
|
| 13 |
import os
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
load_dotenv()
|
| 18 |
|
|
@@ -27,31 +28,47 @@ SAVE_DIR = "pdf"
|
|
| 27 |
|
| 28 |
|
| 29 |
def generate_response(pages, query_text, k, chain_type):
|
| 30 |
-
if pages
|
| 31 |
pinecone.init(
|
| 32 |
api_key=os.getenv("PINECONE_API_KEY"),
|
| 33 |
environment=os.getenv("PINECONE_ENV_NAME"),
|
| 34 |
)
|
| 35 |
|
| 36 |
-
vector_db =
|
| 37 |
-
documents=pages, embedding=OpenAIEmbeddings(), index_name="
|
| 38 |
)
|
| 39 |
|
| 40 |
retriever = vector_db.as_retriever(
|
| 41 |
search_type="similarity", search_kwards={"k": k}
|
| 42 |
)
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# create a chain to answer questions
|
| 45 |
-
qa =
|
| 46 |
-
llm=
|
| 47 |
chain_type=chain_type,
|
| 48 |
retriever=retriever,
|
| 49 |
-
return_source_documents=True
|
|
|
|
| 50 |
)
|
| 51 |
|
| 52 |
-
response = qa({"
|
| 53 |
return response
|
| 54 |
|
|
|
|
| 55 |
def visual_annotate(document, answer):
|
| 56 |
# Implement this function according to your specific requirements
|
| 57 |
# Highlight the part of the document where the answer was found
|
|
@@ -80,18 +97,19 @@ with st.sidebar.form(key="sidebar-form"):
|
|
| 80 |
)
|
| 81 |
os.environ["PINECONE_API_KEY"] = pinecone_api_key
|
| 82 |
|
| 83 |
-
pinecone_env_name = st.text_input("Enter your Pinecone environment name
|
| 84 |
os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
|
| 85 |
|
| 86 |
-
submitted = st.
|
| 87 |
label="Submit",
|
| 88 |
-
disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
|
| 89 |
)
|
| 90 |
|
| 91 |
left_column, right_column = st.columns(2)
|
| 92 |
|
| 93 |
with left_column:
|
| 94 |
uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
|
|
|
|
| 95 |
|
| 96 |
if uploaded_file is not None:
|
| 97 |
# save the uploaded file to the specified directory
|
|
@@ -101,7 +119,8 @@ with left_column:
|
|
| 101 |
st.success(f"File {uploaded_file.name} is saved at path {file_path}")
|
| 102 |
|
| 103 |
loader = PyPDFLoader(file_path=file_path)
|
| 104 |
-
|
|
|
|
| 105 |
|
| 106 |
query_text = st.text_input(
|
| 107 |
"Enter your question:", placeholder="Please provide a short summary."
|
|
@@ -115,20 +134,23 @@ with left_column:
|
|
| 115 |
|
| 116 |
with st.spinner("Retrieving and generating a response ..."):
|
| 117 |
response = generate_response(
|
| 118 |
-
pages=pages,
|
| 119 |
-
query_text=query_text,
|
| 120 |
-
k=k,
|
| 121 |
-
chain_type=chain_type
|
| 122 |
)
|
| 123 |
|
| 124 |
with right_column:
|
| 125 |
st.write("Output of your question")
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
|
| 134 |
# with st.form("myform", clear_on_submit=True):
|
|
|
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from langchain_community.document_loaders.pdf import PyPDFLoader
|
| 6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
+
from langchain_community.vectorstores.pinecone import Pinecone
|
| 8 |
+
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
| 10 |
from langchain_core.prompts import ChatPromptTemplate
|
| 11 |
+
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
|
| 12 |
import openai
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
import os
|
| 15 |
|
| 16 |
+
import pinecone
|
| 17 |
|
| 18 |
load_dotenv()
|
| 19 |
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def generate_response(pages, query_text, k, chain_type):
|
| 31 |
+
if pages:
|
| 32 |
pinecone.init(
|
| 33 |
api_key=os.getenv("PINECONE_API_KEY"),
|
| 34 |
environment=os.getenv("PINECONE_ENV_NAME"),
|
| 35 |
)
|
| 36 |
|
| 37 |
+
vector_db = Pinecone.from_documents(
|
| 38 |
+
documents=pages, embedding=OpenAIEmbeddings(), index_name="document-chat"
|
| 39 |
)
|
| 40 |
|
| 41 |
retriever = vector_db.as_retriever(
|
| 42 |
search_type="similarity", search_kwards={"k": k}
|
| 43 |
)
|
| 44 |
+
|
| 45 |
+
prompt_template = ChatPromptTemplate.from_messages(
|
| 46 |
+
[
|
| 47 |
+
(
|
| 48 |
+
"system",
|
| 49 |
+
"You are a helpful assistant that can answer questions regarding to a document provided by the user.",
|
| 50 |
+
),
|
| 51 |
+
("human", "Hello, how are you doing?"),
|
| 52 |
+
("ai", "I'm doing well, thanks!"),
|
| 53 |
+
("human", "{user_input}"),
|
| 54 |
+
]
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
| 58 |
+
|
| 59 |
# create a chain to answer questions
|
| 60 |
+
qa = RetrievalQAWithSourcesChain.from_chain_type(
|
| 61 |
+
llm=llm,
|
| 62 |
chain_type=chain_type,
|
| 63 |
retriever=retriever,
|
| 64 |
+
return_source_documents=True,
|
| 65 |
+
# prompt_template=prompt_template,
|
| 66 |
)
|
| 67 |
|
| 68 |
+
response = qa({"question": query_text})
|
| 69 |
return response
|
| 70 |
|
| 71 |
+
|
| 72 |
def visual_annotate(document, answer):
|
| 73 |
# Implement this function according to your specific requirements
|
| 74 |
# Highlight the part of the document where the answer was found
|
|
|
|
| 97 |
)
|
| 98 |
os.environ["PINECONE_API_KEY"] = pinecone_api_key
|
| 99 |
|
| 100 |
+
pinecone_env_name = st.text_input("Enter your Pinecone environment name")
|
| 101 |
os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
|
| 102 |
|
| 103 |
+
submitted = st.form_submit_button(
|
| 104 |
label="Submit",
|
| 105 |
+
# disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
|
| 106 |
)
|
| 107 |
|
| 108 |
left_column, right_column = st.columns(2)
|
| 109 |
|
| 110 |
with left_column:
|
| 111 |
uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
|
| 112 |
+
pages = []
|
| 113 |
|
| 114 |
if uploaded_file is not None:
|
| 115 |
# save the uploaded file to the specified directory
|
|
|
|
| 119 |
st.success(f"File {uploaded_file.name} is saved at path {file_path}")
|
| 120 |
|
| 121 |
loader = PyPDFLoader(file_path=file_path)
|
| 122 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
| 123 |
+
pages = loader.load_and_split(text_splitter=text_splitter)
|
| 124 |
|
| 125 |
query_text = st.text_input(
|
| 126 |
"Enter your question:", placeholder="Please provide a short summary."
|
|
|
|
| 134 |
|
| 135 |
with st.spinner("Retrieving and generating a response ..."):
|
| 136 |
response = generate_response(
|
| 137 |
+
pages=pages, query_text=query_text, k=k, chain_type=chain_type
|
|
|
|
|
|
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
with right_column:
|
| 141 |
st.write("Output of your question")
|
| 142 |
|
| 143 |
+
if response:
|
| 144 |
+
st.subheader("Result")
|
| 145 |
+
st.write(response["answer"])
|
| 146 |
+
print("response: ", response)
|
| 147 |
+
|
| 148 |
+
st.subheader("source_documents")
|
| 149 |
+
for each in response["source_documents"]:
|
| 150 |
+
st.write("page: ", each.metadata["page"])
|
| 151 |
+
st.write("source: ", each.metadata["source"])
|
| 152 |
+
else:
|
| 153 |
+
st.write("response not showing at the moment")
|
| 154 |
|
| 155 |
|
| 156 |
# with st.form("myform", clear_on_submit=True):
|
pdf/NDA for Student Interns.pdf
ADDED
|
Binary file (530 kB). View file
|
|
|