File size: 3,176 Bytes
474a5bc
 
 
 
 
 
 
 
cbf9460
 
8be95ac
cbf9460
 
072b777
 
 
 
 
cbf9460
 
 
 
 
 
 
 
 
 
474a5bc
bb898c3
474a5bc
 
bb898c3
2301c47
 
 
0d11e29
bb898c3
0d11e29
bb898c3
291c121
5c015e2
291c121
8be95ac
291c121
 
 
 
 
 
 
 
 
bb898c3
291c121
bb898c3
291c121
 
 
8be95ac
bb898c3
291c121
77efc46
291c121
8be95ac
291c121
 
8be95ac
2301c47
291c121
8be95ac
 
 
291c121
 
8be95ac
e4d76d3
 
291c121
e4d76d3
 
291c121
 
 
 
474a5bc
0d11e29
bb898c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from dotenv import load_dotenv
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import time
import logging
import os

# Setup logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    filename='app_log.log',  # Log file name
                    filemode='a')  # Append mode

def process_pdf(pdf):
    start_time = time.time()
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text() or ""
    end_time = time.time()
    logging.info(f"Processed PDF in {end_time - start_time} seconds")
    return text

def main():
    load_dotenv()
    st.set_page_config(page_title="Chat PDF")
    st.header("Chat PDF 💬")

    if 'chat_history' not in st.session_state:
        st.session_state.chat_history = []

    pdfs = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)

    if pdfs:
        try:
            start_time = time.time()
            text = ""
            for pdf in pdfs:
                text += process_pdf(pdf)

            if not text:
                st.write("No text could be extracted from the PDFs.")
                return

            processing_time = time.time() - start_time
            logging.info(f"Total PDF processing time: {processing_time} seconds")

            char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000,
                                                       chunk_overlap=200, length_function=len)
            text_chunks = char_text_splitter.split_text(text)

            embeddings = OpenAIEmbeddings()
            docsearch = FAISS.from_texts(text_chunks, embeddings)
            llm = OpenAI()
            chain = load_qa_chain(llm, chain_type="stuff")

            query = st.text_input("Type your question:")

            if query:
                qa_start_time = time.time()
                docs = docsearch.similarity_search(query)
                response = chain.run(input_documents=docs, question=query)
                qa_end_time = time.time()

                # Update chat history
                processing_info = f"Processing Time: {qa_end_time - qa_start_time:.2f} seconds"
                st.session_state.chat_history.append({"question": query, "answer": response, "time": processing_info})

                # Clear the input
                st.session_state.query = ""

                # Display chat history in a text area
                history_text = ""
                for chat in st.session_state.chat_history:
                    history_text += f"Q: {chat['question']}\nA: {chat['answer']}\n{chat['time']}\n---\n"
                st.text_area("Chat History", history_text, height=300)

        except Exception as e:
            logging.error(f"An error occurred: {e}")
            st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()