Spaces:
Sleeping
Sleeping
File size: 3,176 Bytes
474a5bc cbf9460 8be95ac cbf9460 072b777 cbf9460 474a5bc bb898c3 474a5bc bb898c3 2301c47 0d11e29 bb898c3 0d11e29 bb898c3 291c121 5c015e2 291c121 8be95ac 291c121 bb898c3 291c121 bb898c3 291c121 8be95ac bb898c3 291c121 77efc46 291c121 8be95ac 291c121 8be95ac 2301c47 291c121 8be95ac 291c121 8be95ac e4d76d3 291c121 e4d76d3 291c121 474a5bc 0d11e29 bb898c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from dotenv import load_dotenv
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
import time
import logging
import os
# Setup logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
filename='app_log.log', # Log file name
filemode='a') # Append mode
def process_pdf(pdf):
start_time = time.time()
pdf_reader = PdfReader(pdf)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or ""
end_time = time.time()
logging.info(f"Processed PDF in {end_time - start_time} seconds")
return text
def main():
load_dotenv()
st.set_page_config(page_title="Chat PDF")
st.header("Chat PDF 💬")
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
pdfs = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
if pdfs:
try:
start_time = time.time()
text = ""
for pdf in pdfs:
text += process_pdf(pdf)
if not text:
st.write("No text could be extracted from the PDFs.")
return
processing_time = time.time() - start_time
logging.info(f"Total PDF processing time: {processing_time} seconds")
char_text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000,
chunk_overlap=200, length_function=len)
text_chunks = char_text_splitter.split_text(text)
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(text_chunks, embeddings)
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")
query = st.text_input("Type your question:")
if query:
qa_start_time = time.time()
docs = docsearch.similarity_search(query)
response = chain.run(input_documents=docs, question=query)
qa_end_time = time.time()
# Update chat history
processing_info = f"Processing Time: {qa_end_time - qa_start_time:.2f} seconds"
st.session_state.chat_history.append({"question": query, "answer": response, "time": processing_info})
# Clear the input
st.session_state.query = ""
# Display chat history in a text area
history_text = ""
for chat in st.session_state.chat_history:
history_text += f"Q: {chat['question']}\nA: {chat['answer']}\n{chat['time']}\n---\n"
st.text_area("Chat History", history_text, height=300)
except Exception as e:
logging.error(f"An error occurred: {e}")
st.error(f"An error occurred: {e}")
if __name__ == "__main__":
main()
|