from dotenv import load_dotenv import streamlit as st from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI import time import logging import os # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='app_log.log', # Log file name filemode='a') # Append mode def process_pdf(pdf): start_time = time.time() pdf_reader = PdfReader(pdf) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" end_time = time.time() logging.info(f"Processed PDF in {end_time - start_time} seconds") return text def main(): load_dotenv() st.set_page_config(page_title="Chat PDF") st.header("Chat PDF 💬") if 'chat_history' not in st.session_state: st.session_state.chat_history = [] pdfs = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True) if pdfs: try: start_time = time.time() text = "" for pdf in pdfs: text += process_pdf(pdf) if not text: st.write("No text could be extracted from the PDFs.") return processing_time = time.time() - start_time logging.info(f"Total PDF processing time: {processing_time} seconds") char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len) text_chunks = char_text_splitter.split_text(text) embeddings = OpenAIEmbeddings() docsearch = FAISS.from_texts(text_chunks, embeddings) llm = OpenAI() chain = load_qa_chain(llm, chain_type="stuff") query = st.text_input("Type your question:") if query: qa_start_time = time.time() docs = docsearch.similarity_search(query) response = chain.run(input_documents=docs, question=query) qa_end_time = time.time() # Update chat history processing_info = f"Processing Time: {qa_end_time - qa_start_time:.2f} seconds" st.session_state.chat_history.append({"question": query, "answer": response, "time": processing_info}) # Clear the input st.session_state.query = "" # Display chat history in a text area history_text = "" for chat in st.session_state.chat_history: history_text += f"Q: {chat['question']}\nA: {chat['answer']}\n{chat['time']}\n---\n" st.text_area("Chat History", history_text, height=300) except Exception as e: logging.error(f"An error occurred: {e}") st.error(f"An error occurred: {e}") if __name__ == "__main__": main()