Writo commited on
Commit
8dfb1ea
·
1 Parent(s): ab85145

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.llms import OpenAI
9
+ import time
10
+ import logging
11
+ import os
12
+
13
+ # Setup logging
14
+ logging.basicConfig(level=logging.INFO,
15
+ format='%(asctime)s - %(levelname)s - %(message)s',
16
+ filename='app_log.log', # Log file name
17
+ filemode='a') # Append mode
18
+
19
+ def process_pdf(pdf):
20
+ start_time = time.time()
21
+ pdf_reader = PdfReader(pdf)
22
+ text = ""
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text() or ""
25
+ end_time = time.time()
26
+ logging.info(f"Processed PDF in {end_time - start_time} seconds")
27
+ return text
28
+
29
+ def main():
30
+ load_dotenv()
31
+ st.set_page_config(page_title="Chat PDF")
32
+ st.header("Chat PDF 💬")
33
+
34
+ if 'chat_history' not in st.session_state:
35
+ st.session_state.chat_history = []
36
+
37
+ pdfs = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
38
+
39
+ if pdfs:
40
+ try:
41
+ start_time = time.time()
42
+ text = ""
43
+ for pdf in pdfs:
44
+ text += process_pdf(pdf)
45
+
46
+ if not text:
47
+ st.write("No text could be extracted from the PDFs.")
48
+ return
49
+
50
+ processing_time = time.time() - start_time
51
+ logging.info(f"Total PDF processing time: {processing_time} seconds")
52
+
53
+ char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000,
54
+ chunk_overlap=200, length_function=len)
55
+ text_chunks = char_text_splitter.split_text(text)
56
+
57
+ embeddings = OpenAIEmbeddings()
58
+ docsearch = FAISS.from_texts(text_chunks, embeddings)
59
+ llm = OpenAI()
60
+ chain = load_qa_chain(llm, chain_type="stuff")
61
+
62
+ query = st.text_input("Type your question:")
63
+
64
+ if query:
65
+ qa_start_time = time.time()
66
+ docs = docsearch.similarity_search(query)
67
+ response = chain.run(input_documents=docs, question=query)
68
+ qa_end_time = time.time()
69
+
70
+ # Update chat history
71
+ processing_info = f"Processing Time: {qa_end_time - qa_start_time:.2f} seconds"
72
+ st.session_state.chat_history.append({"question": query, "answer": response, "time": processing_info})
73
+
74
+ # Clear the input
75
+ st.session_state.query = ""
76
+
77
+ # Display chat history in a text area
78
+ history_text = ""
79
+ for chat in st.session_state.chat_history:
80
+ history_text += f"Q: {chat['question']}\nA: {chat['answer']}\n{chat['time']}\n---\n"
81
+ st.text_area("Chat History", history_text, height=300)
82
+
83
+ except Exception as e:
84
+ logging.error(f"An error occurred: {e}")
85
+ st.error(f"An error occurred: {e}")
86
+
87
+ if __name__ == "__main__":
88
+ main()