dimoZ commited on
Commit
bd127a4
·
verified ·
1 Parent(s): 14c5aab

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +115 -0
  3. requirements.txt +0 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY="AIzaSyC9eG7mMxZqHojCeFm8xjLOaduRUWjNku8"
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ import google.generativeai as genai
7
+ from langchain.vectorstores import FAISS
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
+ from langchain.prompts import PromptTemplate
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+ google_api_key = os.getenv("GOOGLE_API_KEY")
16
+ if google_api_key is None:
17
+ st.error("GOOGLE_API_KEY is not set. Please set it in the .env file.")
18
+ else:
19
+ genai.configure(api_key=google_api_key)
20
+
21
+ # Global variable to store chat history
22
+ if 'chat_history' not in st.session_state:
23
+ st.session_state.chat_history = []
24
+
25
+ # Function to extract text from PDF files
26
+ def get_pdf_text(pdf_docs):
27
+ text = ""
28
+ for pdf in pdf_docs:
29
+ pdf_reader = PdfReader(pdf)
30
+ for page in pdf_reader.pages:
31
+ text += page.extract_text()
32
+ return text
33
+
34
+ # Function to split text into chunks
35
+ def get_text_chunks(text):
36
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
37
+ chunks = text_splitter.split_text(text)
38
+ return chunks
39
+
40
+ # Function to create and save a vector store from text chunks
41
+ def get_vector_store(text_chunks):
42
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
43
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
44
+ vector_store.save_local("faiss_index")
45
+
46
+ # Function to load a question-answering chain
47
+ def get_conversational_chain():
48
+ prompt_template = """
49
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details. If the answer is not in
50
+ provided context, just say, "Answer is not available in the context." Don't provide the wrong answer.\n\n
51
+ Context:\n{context}\n
52
+ Question:\n{question}\n
53
+ Answer:
54
+ """
55
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.5)
56
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
57
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
58
+ return chain
59
+
60
+ # Function to handle user input and generate a response
61
+ def user_input(user_question):
62
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
63
+
64
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
65
+ docs = new_db.similarity_search(user_question)
66
+
67
+ chain = get_conversational_chain()
68
+
69
+ response = chain(
70
+ {"input_documents": docs, "question": user_question},
71
+ return_only_outputs=True
72
+ )
73
+
74
+ response_text = response["output_text"]
75
+
76
+ # Update chat history
77
+ st.session_state.chat_history.append({"question": user_question, "answer": response_text})
78
+
79
+ st.write("Reply: ", response_text)
80
+
81
+ # Function to download chat history as a .txt file
82
+ def download_chat_history_txt():
83
+ chat_lines = [f"Question: {entry['question']}\nAnswer: {entry['answer']}\n" for entry in st.session_state.chat_history]
84
+ chat_text = "\n".join(chat_lines)
85
+ return chat_text
86
+
87
+ # Main function to run the Streamlit app
88
+ def main():
89
+ st.set_page_config(page_title="Chat PDF", layout="wide")
90
+ st.header("Chat with PDF")
91
+
92
+ user_question = st.text_input("Ask a Question from the PDF Files")
93
+
94
+ if user_question:
95
+ user_input(user_question)
96
+
97
+ with st.sidebar:
98
+ st.title("Menu:")
99
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
100
+ if st.button("Submit & Process"):
101
+ if pdf_docs:
102
+ with st.spinner("Processing..."):
103
+ raw_text = get_pdf_text(pdf_docs)
104
+ text_chunks = get_text_chunks(raw_text)
105
+ get_vector_store(text_chunks)
106
+ st.success("Done")
107
+ else:
108
+ st.error("Please upload at least one PDF file.")
109
+
110
+ # Only one download button
111
+ chat_txt = download_chat_history_txt()
112
+ st.download_button("Download Chat History (TXT)", chat_txt, file_name="chat_history.txt", mime="text/plain")
113
+
114
+ if __name__ == "__main__":
115
+ main()
requirements.txt ADDED
File without changes