Benjy commited on
Commit
a70f40e
·
verified ·
1 Parent(s): cb031cf

Create chatpdf.py

Browse files
Files changed (1) hide show
  1. chatpdf.py +53 -0
chatpdf.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.llms import OpenAI
8
+ from langchain.callbacks import get_openai_callback
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ def main():
15
+ st.set_page_config(page_title="PDF Chat")
16
+ st.header("Chat with your PDFs 💬")
17
+
18
+ # Upload PDF files
19
+ pdf_files = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
20
+
21
+ if pdf_files:
22
+ for pdf_file in pdf_files:
23
+ pdf_reader = PdfReader(pdf_file)
24
+
25
+ text = ""
26
+ for page in pdf_reader.pages:
27
+ text += page.extract_text()
28
+
29
+ text_splitter = CharacterTextSplitter(
30
+ separator="\n",
31
+ chunk_size=1000,
32
+ chunk_overlap=200,
33
+ length_function=len
34
+ )
35
+ chunks = text_splitter.split_text(text)
36
+
37
+ embeddings = OpenAIEmbeddings()
38
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
39
+
40
+ user_question = st.text_input("Ask a question about your PDF:")
41
+ if user_question:
42
+ docs = knowledge_base.similarity_search(user_question)
43
+
44
+ llm = OpenAI()
45
+ chain = load_qa_chain(llm, chain_type="stuff")
46
+ with get_openai_callback() as cb:
47
+ response = chain.run(input_documents=docs, question=user_question)
48
+ print(cb)
49
+
50
+ st.write(response)
51
+
52
+ if __name__ == '__main__':
53
+ main()