Writo commited on
Commit
474a5bc
·
1 Parent(s): 3b3afbd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.llms import OpenAI
9
+
10
+
11
+
12
+ def main():
13
+ load_dotenv()
14
+ st.set_page_config(page_title="Chat PDF")
15
+ st.header("Chat PDF 💬")
16
+
17
+ # upload file
18
+ pdf = st.file_uploader("Upload your PDF file", type="pdf")
19
+
20
+ # extract the text
21
+ if pdf is not None:
22
+ pdf_reader = PdfReader(pdf)
23
+ text = ""
24
+ for page in pdf_reader.pages:
25
+ text += page.extract_text()
26
+
27
+ # split into chunks
28
+ char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000,
29
+ chunk_overlap=200,length_function=len)
30
+ text_chunks = char_text_splitter.split_text(text)
31
+
32
+ # create embeddings
33
+ embeddings = OpenAIEmbeddings()
34
+ docsearch = FAISS.from_texts(text_chunks, embeddings)
35
+ llm = OpenAI()
36
+ chain = load_qa_chain(llm, chain_type="stuff")
37
+
38
+ # show user input
39
+ query = st.text_input("Type your question:")
40
+ if query:
41
+ docs = docsearch.similarity_search(query)
42
+ response = chain.run(input_documents=docs, question=query)
43
+
44
+ st.write(response)
45
+
46
+
47
+ if __name__ == '__main__':
48
+ main()